aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Analysis')
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll788
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll21
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll27
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll54
-rw-r--r--llvm/test/Analysis/CostModel/ARM/arith-overflow.ll1023
-rw-r--r--llvm/test/Analysis/CostModel/ARM/arith-ssat.ll447
-rw-r--r--llvm/test/Analysis/CostModel/ARM/arith-usat.ll447
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/arith-fp.ll3684
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll8
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll64
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll223
-rw-r--r--llvm/test/Analysis/CostModel/X86/fshl.ll372
-rw-r--r--llvm/test/Analysis/CostModel/X86/fshr.ll372
-rw-r--r--llvm/test/Analysis/CostModel/X86/load-broadcast.ll500
-rw-r--r--llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll16
-rw-r--r--llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll16
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll10
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll256
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-load.ll108
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll8
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll216
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll16
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll260
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-splat.ll256
-rw-r--r--llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll232
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll156
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll156
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll148
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll148
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll152
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll152
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/Banerjee.ll6
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll79
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/GCD.ll8
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll2
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/Preliminary.ll2
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll4
-rw-r--r--llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll2
-rw-r--r--llvm/test/Analysis/LoopAccessAnalysis/runtime-check-known-true.ll50
39 files changed, 5362 insertions, 5127 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll
index 9801d14..388362f 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll
@@ -3,619 +3,619 @@
define void @fcmp_oeq(i32 %arg) {
; CHECK-LABEL: 'fcmp_oeq'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp oeq <2 x float> undef, undef
- %v4f32 = fcmp oeq <4 x float> undef, undef
- %v8f32 = fcmp oeq <8 x float> undef, undef
- %v2f64 = fcmp oeq <2 x double> undef, undef
- %v4f64 = fcmp oeq <4 x double> undef, undef
- %v2f16 = fcmp oeq <2 x half> undef, undef
- %v4f16 = fcmp oeq <4 x half> undef, undef
- %v8f16 = fcmp oeq <8 x half> undef, undef
- %v16f16 = fcmp oeq <16 x half> undef, undef
+ %v2f32 = fcmp oeq <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp oeq <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp oeq <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp oeq <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp oeq <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp oeq <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp oeq <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp oeq <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp oeq <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_oeq_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_oeq_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp oeq <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp oeq <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp oeq <2 x bfloat> undef, undef
- %v4bf16 = fcmp oeq <4 x bfloat> undef, undef
- %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
+ %v2bf16 = fcmp oeq <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp oeq <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_ogt(i32 %arg) {
; CHECK-LABEL: 'fcmp_ogt'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp ogt <2 x float> undef, undef
- %v4f32 = fcmp ogt <4 x float> undef, undef
- %v8f32 = fcmp ogt <8 x float> undef, undef
- %v2f64 = fcmp ogt <2 x double> undef, undef
- %v4f64 = fcmp ogt <4 x double> undef, undef
- %v2f16 = fcmp ogt <2 x half> undef, undef
- %v4f16 = fcmp ogt <4 x half> undef, undef
- %v8f16 = fcmp ogt <8 x half> undef, undef
- %v16f16 = fcmp ogt <16 x half> undef, undef
+ %v2f32 = fcmp ogt <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp ogt <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp ogt <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp ogt <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp ogt <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp ogt <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp ogt <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp ogt <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp ogt <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_ogt_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_ogt_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ogt <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ogt <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp ogt <2 x bfloat> undef, undef
- %v4bf16 = fcmp ogt <4 x bfloat> undef, undef
- %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
+ %v2bf16 = fcmp ogt <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp ogt <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_oge(i32 %arg) {
; CHECK-LABEL: 'fcmp_oge'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp oge <2 x float> undef, undef
- %v4f32 = fcmp oge <4 x float> undef, undef
- %v8f32 = fcmp oge <8 x float> undef, undef
- %v2f64 = fcmp oge <2 x double> undef, undef
- %v4f64 = fcmp oge <4 x double> undef, undef
- %v2f16 = fcmp oge <2 x half> undef, undef
- %v4f16 = fcmp oge <4 x half> undef, undef
- %v8f16 = fcmp oge <8 x half> undef, undef
- %v16f16 = fcmp oge <16 x half> undef, undef
+ %v2f32 = fcmp oge <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp oge <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp oge <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp oge <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp oge <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp oge <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp oge <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp oge <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp oge <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_oge_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_oge_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp oge <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp oge <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp oge <2 x bfloat> undef, undef
- %v4bf16 = fcmp oge <4 x bfloat> undef, undef
- %v8bf16 = fcmp oge <8 x bfloat> undef, undef
+ %v2bf16 = fcmp oge <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp oge <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_olt(i32 %arg) {
; CHECK-LABEL: 'fcmp_olt'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp olt <2 x float> undef, undef
- %v4f32 = fcmp olt <4 x float> undef, undef
- %v8f32 = fcmp olt <8 x float> undef, undef
- %v2f64 = fcmp olt <2 x double> undef, undef
- %v4f64 = fcmp olt <4 x double> undef, undef
- %v2f16 = fcmp olt <2 x half> undef, undef
- %v4f16 = fcmp olt <4 x half> undef, undef
- %v8f16 = fcmp olt <8 x half> undef, undef
- %v16f16 = fcmp olt <16 x half> undef, undef
+ %v2f32 = fcmp olt <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp olt <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp olt <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp olt <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp olt <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp olt <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp olt <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp olt <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp olt <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_olt_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_olt_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp olt <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp olt <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp olt <2 x bfloat> undef, undef
- %v4bf16 = fcmp olt <4 x bfloat> undef, undef
- %v8bf16 = fcmp olt <8 x bfloat> undef, undef
+ %v2bf16 = fcmp olt <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp olt <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_ole(i32 %arg) {
; CHECK-LABEL: 'fcmp_ole'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp ole <2 x float> undef, undef
- %v4f32 = fcmp ole <4 x float> undef, undef
- %v8f32 = fcmp ole <8 x float> undef, undef
- %v2f64 = fcmp ole <2 x double> undef, undef
- %v4f64 = fcmp ole <4 x double> undef, undef
- %v2f16 = fcmp ole <2 x half> undef, undef
- %v4f16 = fcmp ole <4 x half> undef, undef
- %v8f16 = fcmp ole <8 x half> undef, undef
- %v16f16 = fcmp ole <16 x half> undef, undef
+ %v2f32 = fcmp ole <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp ole <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp ole <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp ole <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp ole <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp ole <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp ole <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp ole <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp ole <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_ole_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_ole_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ole <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ole <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp ole <2 x bfloat> undef, undef
- %v4bf16 = fcmp ole <4 x bfloat> undef, undef
- %v8bf16 = fcmp ole <8 x bfloat> undef, undef
+ %v2bf16 = fcmp ole <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp ole <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_one(i32 %arg) {
; CHECK-LABEL: 'fcmp_one'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp one <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp one <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp one <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp one <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp one <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp one <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp one <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp one <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp one <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp one <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp one <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp one <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp one <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp one <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp one <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp one <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp one <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp one <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp one <2 x float> undef, undef
- %v4f32 = fcmp one <4 x float> undef, undef
- %v8f32 = fcmp one <8 x float> undef, undef
- %v2f64 = fcmp one <2 x double> undef, undef
- %v4f64 = fcmp one <4 x double> undef, undef
- %v2f16 = fcmp one <2 x half> undef, undef
- %v4f16 = fcmp one <4 x half> undef, undef
- %v8f16 = fcmp one <8 x half> undef, undef
- %v16f16 = fcmp one <16 x half> undef, undef
+ %v2f32 = fcmp one <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp one <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp one <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp one <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp one <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp one <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp one <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp one <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp one <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_one_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_one_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp one <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp one <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp one <2 x bfloat> undef, undef
- %v4bf16 = fcmp one <4 x bfloat> undef, undef
- %v8bf16 = fcmp one <8 x bfloat> undef, undef
+ %v2bf16 = fcmp one <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp one <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_ord(i32 %arg) {
; CHECK-LABEL: 'fcmp_ord'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp ord <2 x float> undef, undef
- %v4f32 = fcmp ord <4 x float> undef, undef
- %v8f32 = fcmp ord <8 x float> undef, undef
- %v2f64 = fcmp ord <2 x double> undef, undef
- %v4f64 = fcmp ord <4 x double> undef, undef
- %v2f16 = fcmp ord <2 x half> undef, undef
- %v4f16 = fcmp ord <4 x half> undef, undef
- %v8f16 = fcmp ord <8 x half> undef, undef
- %v16f16 = fcmp ord <16 x half> undef, undef
+ %v2f32 = fcmp ord <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp ord <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp ord <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp ord <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp ord <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp ord <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp ord <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp ord <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp ord <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_ord_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_ord_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ord <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ord <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp ord <2 x bfloat> undef, undef
- %v4bf16 = fcmp ord <4 x bfloat> undef, undef
- %v8bf16 = fcmp ord <8 x bfloat> undef, undef
+ %v2bf16 = fcmp ord <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp ord <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_ueq(i32 %arg) {
; CHECK-LABEL: 'fcmp_ueq'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp ueq <2 x float> undef, undef
- %v4f32 = fcmp ueq <4 x float> undef, undef
- %v8f32 = fcmp ueq <8 x float> undef, undef
- %v2f64 = fcmp ueq <2 x double> undef, undef
- %v4f64 = fcmp ueq <4 x double> undef, undef
- %v2f16 = fcmp ueq <2 x half> undef, undef
- %v4f16 = fcmp ueq <4 x half> undef, undef
- %v8f16 = fcmp ueq <8 x half> undef, undef
- %v16f16 = fcmp ueq <16 x half> undef, undef
+ %v2f32 = fcmp ueq <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp ueq <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp ueq <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp ueq <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp ueq <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp ueq <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp ueq <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp ueq <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp ueq <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_ueq_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_ueq_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ueq <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ueq <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp ueq <2 x bfloat> undef, undef
- %v4bf16 = fcmp ueq <4 x bfloat> undef, undef
- %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
+ %v2bf16 = fcmp ueq <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp ueq <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_ugt(i32 %arg) {
; CHECK-LABEL: 'fcmp_ugt'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp ugt <2 x float> undef, undef
- %v4f32 = fcmp ugt <4 x float> undef, undef
- %v8f32 = fcmp ugt <8 x float> undef, undef
- %v2f64 = fcmp ugt <2 x double> undef, undef
- %v4f64 = fcmp ugt <4 x double> undef, undef
- %v2f16 = fcmp ugt <2 x half> undef, undef
- %v4f16 = fcmp ugt <4 x half> undef, undef
- %v8f16 = fcmp ugt <8 x half> undef, undef
- %v16f16 = fcmp ugt <16 x half> undef, undef
+ %v2f32 = fcmp ugt <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp ugt <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp ugt <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp ugt <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp ugt <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp ugt <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp ugt <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp ugt <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp ugt <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_ugt_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_ugt_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ugt <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ugt <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp ugt <2 x bfloat> undef, undef
- %v4bf16 = fcmp ugt <4 x bfloat> undef, undef
- %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
+ %v2bf16 = fcmp ugt <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp ugt <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_uge(i32 %arg) {
; CHECK-LABEL: 'fcmp_uge'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp uge <2 x float> undef, undef
- %v4f32 = fcmp uge <4 x float> undef, undef
- %v8f32 = fcmp uge <8 x float> undef, undef
- %v2f64 = fcmp uge <2 x double> undef, undef
- %v4f64 = fcmp uge <4 x double> undef, undef
- %v2f16 = fcmp uge <2 x half> undef, undef
- %v4f16 = fcmp uge <4 x half> undef, undef
- %v8f16 = fcmp uge <8 x half> undef, undef
- %v16f16 = fcmp uge <16 x half> undef, undef
+ %v2f32 = fcmp uge <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp uge <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp uge <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp uge <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp uge <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp uge <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp uge <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp uge <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp uge <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_uge_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_uge_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp uge <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp uge <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp uge <2 x bfloat> undef, undef
- %v4bf16 = fcmp uge <4 x bfloat> undef, undef
- %v8bf16 = fcmp uge <8 x bfloat> undef, undef
+ %v2bf16 = fcmp uge <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp uge <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_ult(i32 %arg) {
; CHECK-LABEL: 'fcmp_ult'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp ult <2 x float> undef, undef
- %v4f32 = fcmp ult <4 x float> undef, undef
- %v8f32 = fcmp ult <8 x float> undef, undef
- %v2f64 = fcmp ult <2 x double> undef, undef
- %v4f64 = fcmp ult <4 x double> undef, undef
- %v2f16 = fcmp ult <2 x half> undef, undef
- %v4f16 = fcmp ult <4 x half> undef, undef
- %v8f16 = fcmp ult <8 x half> undef, undef
- %v16f16 = fcmp ult <16 x half> undef, undef
+ %v2f32 = fcmp ult <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp ult <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp ult <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp ult <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp ult <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp ult <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp ult <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp ult <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp ult <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_ult_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_ult_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ult <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ult <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp ult <2 x bfloat> undef, undef
- %v4bf16 = fcmp ult <4 x bfloat> undef, undef
- %v8bf16 = fcmp ult <8 x bfloat> undef, undef
+ %v2bf16 = fcmp ult <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp ult <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef
ret void
}
define void @fcmp_ule(i32 %arg) {
; CHECK-LABEL: 'fcmp_ule'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp ule <2 x float> undef, undef
- %v4f32 = fcmp ule <4 x float> undef, undef
- %v8f32 = fcmp ule <8 x float> undef, undef
- %v2f64 = fcmp ule <2 x double> undef, undef
- %v4f64 = fcmp ule <4 x double> undef, undef
- %v2f16 = fcmp ule <2 x half> undef, undef
- %v4f16 = fcmp ule <4 x half> undef, undef
- %v8f16 = fcmp ule <8 x half> undef, undef
- %v16f16 = fcmp ule <16 x half> undef, undef
+ %v2f32 = fcmp ule <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp ule <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp ule <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp ule <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp ule <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp ule <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp ule <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp ule <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp ule <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_ule_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_ule_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp ule <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp ule <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp ule <2 x bfloat> undef, undef
- %v4bf16 = fcmp ule <4 x bfloat> undef, undef
- %v8bf16 = fcmp ule <8 x bfloat> undef, undef
- %v16bf16 = fcmp ule <16 x bfloat> undef, undef
+ %v2bf16 = fcmp ule <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp ule <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef
+ %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef
ret void
}
define void @fcmp_une(i32 %arg) {
; CHECK-LABEL: 'fcmp_une'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp une <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp une <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp une <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp une <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp une <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp une <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp une <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp une <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp une <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp une <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp une <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp une <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp une <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp une <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp une <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp une <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp une <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp une <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp une <2 x float> undef, undef
- %v4f32 = fcmp une <4 x float> undef, undef
- %v8f32 = fcmp une <8 x float> undef, undef
- %v2f64 = fcmp une <2 x double> undef, undef
- %v4f64 = fcmp une <4 x double> undef, undef
- %v2f16 = fcmp une <2 x half> undef, undef
- %v4f16 = fcmp une <4 x half> undef, undef
- %v8f16 = fcmp une <8 x half> undef, undef
- %v16f16 = fcmp une <16 x half> undef, undef
+ %v2f32 = fcmp une <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp une <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp une <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp une <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp une <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp une <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp une <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp une <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp une <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_une_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_une_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp une <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp une <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp une <2 x bfloat> undef, undef
- %v4bf16 = fcmp une <4 x bfloat> undef, undef
- %v8bf16 = fcmp une <8 x bfloat> undef, undef
- %v16bf16 = fcmp une <16 x bfloat> undef, undef
+ %v2bf16 = fcmp une <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp une <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef
+ %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef
ret void
}
define void @fcmp_uno(i32 %arg) {
; CHECK-LABEL: 'fcmp_uno'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp uno <2 x float> undef, undef
- %v4f32 = fcmp uno <4 x float> undef, undef
- %v8f32 = fcmp uno <8 x float> undef, undef
- %v2f64 = fcmp uno <2 x double> undef, undef
- %v4f64 = fcmp uno <4 x double> undef, undef
- %v2f16 = fcmp uno <2 x half> undef, undef
- %v4f16 = fcmp uno <4 x half> undef, undef
- %v8f16 = fcmp uno <8 x half> undef, undef
- %v16f16 = fcmp uno <16 x half> undef, undef
+ %v2f32 = fcmp uno <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp uno <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp uno <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp uno <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp uno <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp uno <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp uno <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp uno <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp uno <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_uno_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_uno_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp uno <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp uno <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp uno <2 x bfloat> undef, undef
- %v4bf16 = fcmp uno <4 x bfloat> undef, undef
- %v8bf16 = fcmp uno <8 x bfloat> undef, undef
- %v16bf16 = fcmp uno <16 x bfloat> undef, undef
+ %v2bf16 = fcmp uno <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp uno <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef
+ %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef
ret void
}
define void @fcmp_true(i32 %arg) {
; CHECK-LABEL: 'fcmp_true'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp true <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp true <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp true <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp true <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp true <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp true <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp true <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp true <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp true <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp true <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp true <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp true <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp true <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp true <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp true <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp true <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp true <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp true <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp true <2 x float> undef, undef
- %v4f32 = fcmp true <4 x float> undef, undef
- %v8f32 = fcmp true <8 x float> undef, undef
- %v2f64 = fcmp true <2 x double> undef, undef
- %v4f64 = fcmp true <4 x double> undef, undef
- %v2f16 = fcmp true <2 x half> undef, undef
- %v4f16 = fcmp true <4 x half> undef, undef
- %v8f16 = fcmp true <8 x half> undef, undef
- %v16f16 = fcmp true <16 x half> undef, undef
+ %v2f32 = fcmp true <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp true <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp true <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp true <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp true <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp true <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp true <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp true <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp true <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_true_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_true_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp true <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp true <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp true <2 x bfloat> undef, undef
- %v4bf16 = fcmp true <4 x bfloat> undef, undef
- %v8bf16 = fcmp true <8 x bfloat> undef, undef
- %v16bf16 = fcmp true <16 x bfloat> undef, undef
+ %v2bf16 = fcmp true <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp true <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef
+ %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef
ret void
}
define void @fcmp_false(i32 %arg) {
; CHECK-LABEL: 'fcmp_false'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp false <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp false <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp false <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp false <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp false <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp false <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp false <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp false <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp false <16 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp false <vscale x 2 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp false <vscale x 4 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp false <vscale x 8 x float> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp false <vscale x 2 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp false <vscale x 4 x double> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp false <vscale x 2 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp false <vscale x 4 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp false <vscale x 8 x half> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp false <vscale x 16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2f32 = fcmp false <2 x float> undef, undef
- %v4f32 = fcmp false <4 x float> undef, undef
- %v8f32 = fcmp false <8 x float> undef, undef
- %v2f64 = fcmp false <2 x double> undef, undef
- %v4f64 = fcmp false <4 x double> undef, undef
- %v2f16 = fcmp false <2 x half> undef, undef
- %v4f16 = fcmp false <4 x half> undef, undef
- %v8f16 = fcmp false <8 x half> undef, undef
- %v16f16 = fcmp false <16 x half> undef, undef
+ %v2f32 = fcmp false <vscale x 2 x float> undef, undef
+ %v4f32 = fcmp false <vscale x 4 x float> undef, undef
+ %v8f32 = fcmp false <vscale x 8 x float> undef, undef
+ %v2f64 = fcmp false <vscale x 2 x double> undef, undef
+ %v4f64 = fcmp false <vscale x 4 x double> undef, undef
+ %v2f16 = fcmp false <vscale x 2 x half> undef, undef
+ %v4f16 = fcmp false <vscale x 4 x half> undef, undef
+ %v8f16 = fcmp false <vscale x 8 x half> undef, undef
+ %v16f16 = fcmp false <vscale x 16 x half> undef, undef
ret void
}
define void @fcmp_false_bfloat(i32 %arg) {
; CHECK-LABEL: 'fcmp_false_bfloat'
-; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = fcmp false <vscale x 2 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = fcmp false <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %v2bf16 = fcmp false <2 x bfloat> undef, undef
- %v4bf16 = fcmp false <4 x bfloat> undef, undef
- %v8bf16 = fcmp false <8 x bfloat> undef, undef
- %v16bf16 = fcmp false <16 x bfloat> undef, undef
+ %v2bf16 = fcmp false <vscale x 2 x bfloat> undef, undef
+ %v4bf16 = fcmp false <vscale x 4 x bfloat> undef, undef
+ %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef
+ %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef
ret void
}
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
index 117315c..805b3713 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
@@ -31,3 +31,24 @@ define void @sve_fpext() {
ret void
}
+
+define void @sve_fpext_bf16() {
+; CHECK-LABEL: 'sve_fpext_bf16'
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
+ %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
+ %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
+
+ %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
+ %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
+ %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
+
+ ret void
+}
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
index a17c6ce..bb31ebf 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOBF16
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
@@ -31,3 +32,27 @@ define void @sve_fptruncs() {
ret void
}
+
+define void @sve_fptruncs_bf16() {
+; CHECK-LABEL: 'sve_fptruncs_bf16'
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
+; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
+ %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
+ %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
+
+ %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
+ %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
+ %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-BF16: {{.*}}
+; CHECK-NOBF16: {{.*}}
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index ee485e2..7e8d957 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -449,33 +449,33 @@ define void @vector_reverse() #0 {
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; TYPE_BASED_ONLY-LABEL: 'vector_reverse'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
index 9c2d6bc..adb1a72 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -29,119 +26,62 @@ declare {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8>, <32 x
declare {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8>, <64 x i8>)
define i32 @sadd(i32 %arg) {
-; V8M-RECIP-LABEL: 'sadd'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'sadd'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'sadd'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'sadd'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'sadd'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'sadd'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 289 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'sadd'
+; V8M-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 128 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'sadd'
+; NEON-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'sadd'
+; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:126 CodeSize:92 Lat:126 SizeLat:126 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:218 CodeSize:145 Lat:218 SizeLat:218 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:434 CodeSize:289 Lat:434 SizeLat:434 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:78 CodeSize:71 Lat:78 SizeLat:78 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:154 CodeSize:141 Lat:154 SizeLat:154 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:142 CodeSize:135 Lat:142 SizeLat:142 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:284 CodeSize:270 Lat:284 SizeLat:284 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:272 CodeSize:264 Lat:272 SizeLat:272 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:544 CodeSize:528 Lat:544 SizeLat:544 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
%V2I64 = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -187,119 +127,62 @@ declare {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8>, <32 x
declare {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8>, <64 x i8>)
define i32 @uadd(i32 %arg) {
-; V8M-RECIP-LABEL: 'uadd'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'uadd'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'uadd'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'uadd'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'uadd'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'uadd'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'uadd'
+; V8M-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 128 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'uadd'
+; NEON-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'uadd'
+; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:54 Lat:72 SizeLat:72 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:144 CodeSize:108 Lat:144 SizeLat:144 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:288 CodeSize:216 Lat:288 SizeLat:288 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:36 Lat:40 SizeLat:40 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:80 CodeSize:72 Lat:80 SizeLat:80 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:68 Lat:72 SizeLat:72 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:144 CodeSize:136 Lat:144 SizeLat:144 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:136 CodeSize:132 Lat:136 SizeLat:136 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:272 CodeSize:264 Lat:272 SizeLat:272 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
%V2I64 = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -345,119 +228,62 @@ declare {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8>, <32 x
declare {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8>, <64 x i8>)
define i32 @ssub(i32 %arg) {
-; V8M-RECIP-LABEL: 'ssub'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'ssub'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'ssub'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'ssub'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'ssub'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'ssub'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 289 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'ssub'
+; V8M-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 128 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'ssub'
+; NEON-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'ssub'
+; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:126 CodeSize:92 Lat:126 SizeLat:126 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:218 CodeSize:145 Lat:218 SizeLat:218 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:434 CodeSize:289 Lat:434 SizeLat:434 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:78 CodeSize:71 Lat:78 SizeLat:78 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:154 CodeSize:141 Lat:154 SizeLat:154 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:142 CodeSize:135 Lat:142 SizeLat:142 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:284 CodeSize:270 Lat:284 SizeLat:284 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:272 CodeSize:264 Lat:272 SizeLat:272 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:544 CodeSize:528 Lat:544 SizeLat:544 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
%V2I64 = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -503,119 +329,62 @@ declare {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8>, <32 x
declare {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8>, <64 x i8>)
define i32 @usub(i32 %arg) {
-; V8M-RECIP-LABEL: 'usub'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'usub'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'usub'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'usub'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'usub'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'usub'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'usub'
+; V8M-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 128 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'usub'
+; NEON-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'usub'
+; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:54 Lat:72 SizeLat:72 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:144 CodeSize:108 Lat:144 SizeLat:144 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:288 CodeSize:216 Lat:288 SizeLat:288 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:36 Lat:40 SizeLat:40 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:80 CodeSize:72 Lat:80 SizeLat:80 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:68 Lat:72 SizeLat:72 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:144 CodeSize:136 Lat:144 SizeLat:144 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:136 CodeSize:132 Lat:136 SizeLat:136 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:272 CodeSize:264 Lat:272 SizeLat:272 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
%V2I64 = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -661,119 +430,62 @@ declare {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8>, <32 x
declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x i8>)
define i32 @smul(i32 %arg) {
-; V8M-RECIP-LABEL: 'smul'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'smul'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'smul'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 508 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1016 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2032 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2832 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 880 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'smul'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'smul'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'smul'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 652 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 538 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'smul'
+; V8M-NEXT: Cost Model: Found costs of 15 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:82 CodeSize:30 Lat:30 SizeLat:30 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:164 CodeSize:56 Lat:56 SizeLat:56 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:328 CodeSize:108 Lat:108 SizeLat:108 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 8 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:68 CodeSize:30 Lat:30 SizeLat:30 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:136 CodeSize:58 Lat:58 SizeLat:58 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:272 CodeSize:114 Lat:114 SizeLat:114 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 6 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:56 CodeSize:42 Lat:42 SizeLat:42 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:112 CodeSize:82 Lat:82 SizeLat:82 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:224 CodeSize:162 Lat:162 SizeLat:162 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 6 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:112 CodeSize:82 Lat:82 SizeLat:82 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:224 CodeSize:162 Lat:162 SizeLat:162 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:448 CodeSize:322 Lat:322 SizeLat:322 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'smul'
+; NEON-NEXT: Cost Model: Found costs of RThru:15 CodeSize:8 Lat:8 SizeLat:8 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:83 CodeSize:12 Lat:12 SizeLat:12 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:162 CodeSize:13 Lat:13 SizeLat:13 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:320 CodeSize:15 Lat:15 SizeLat:15 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:21 CodeSize:10 Lat:10 SizeLat:10 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:38 CodeSize:11 Lat:11 SizeLat:11 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:72 CodeSize:13 Lat:13 SizeLat:13 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 6 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:23 CodeSize:8 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:46 CodeSize:9 Lat:9 SizeLat:9 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:92 CodeSize:11 Lat:11 SizeLat:11 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 6 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:23 CodeSize:8 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:46 CodeSize:9 Lat:9 SizeLat:9 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:92 CodeSize:11 Lat:11 SizeLat:11 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'smul'
+; MVE-NEXT: Cost Model: Found costs of 15 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:508 CodeSize:74 Lat:108 SizeLat:108 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:1016 CodeSize:144 Lat:212 SizeLat:212 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2032 CodeSize:284 Lat:420 SizeLat:420 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 8 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:284 CodeSize:150 Lat:152 SizeLat:152 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:872 CodeSize:328 Lat:332 SizeLat:332 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2832 CodeSize:652 Lat:660 SizeLat:660 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 6 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:40 Lat:46 SizeLat:46 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:232 CodeSize:142 Lat:154 SizeLat:154 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:624 CodeSize:282 Lat:306 SizeLat:306 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 6 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:96 CodeSize:72 Lat:78 SizeLat:78 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:360 CodeSize:270 Lat:282 SizeLat:282 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:880 CodeSize:538 Lat:562 SizeLat:562 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
%V2I64 = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -819,119 +531,62 @@ declare {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8>, <32 x
declare {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x i8>)
define i32 @umul(i32 %arg) {
-; V8M-RECIP-LABEL: 'umul'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'umul'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'umul'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 944 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1888 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 484 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1288 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 616 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'umul'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'umul'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'umul'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 534 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'umul'
+; V8M-NEXT: Cost Model: Found costs of 13 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:78 CodeSize:26 Lat:26 SizeLat:26 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:156 CodeSize:48 Lat:48 SizeLat:48 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:312 CodeSize:92 Lat:92 SizeLat:92 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of 7 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:26 Lat:26 SizeLat:26 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:50 Lat:50 SizeLat:50 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:98 Lat:98 SizeLat:98 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:96 CodeSize:66 Lat:66 SizeLat:66 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:192 CodeSize:130 Lat:130 SizeLat:130 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:96 CodeSize:66 Lat:66 SizeLat:66 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:192 CodeSize:130 Lat:130 SizeLat:130 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:384 CodeSize:258 Lat:258 SizeLat:258 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'umul'
+; NEON-NEXT: Cost Model: Found costs of RThru:13 CodeSize:7 Lat:7 SizeLat:7 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:77 CodeSize:7 Lat:7 SizeLat:7 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:154 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:308 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:19 CodeSize:9 Lat:9 SizeLat:9 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:34 CodeSize:10 Lat:10 SizeLat:10 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:64 CodeSize:12 Lat:12 SizeLat:12 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:21 CodeSize:7 Lat:7 SizeLat:7 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:42 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:84 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:21 CodeSize:7 Lat:7 SizeLat:7 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:42 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:84 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'umul'
+; MVE-NEXT: Cost Model: Found costs of 13 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:472 CodeSize:38 Lat:72 SizeLat:72 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:944 CodeSize:72 Lat:140 SizeLat:140 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:1888 CodeSize:140 Lat:276 SizeLat:276 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 7 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:186 CodeSize:149 Lat:150 SizeLat:150 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:484 CodeSize:326 Lat:328 SizeLat:328 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:1288 CodeSize:648 Lat:652 SizeLat:652 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:62 CodeSize:39 Lat:44 SizeLat:44 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:228 CodeSize:140 Lat:150 SizeLat:150 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:616 CodeSize:278 Lat:298 SizeLat:298 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:94 CodeSize:71 Lat:76 SizeLat:76 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:356 CodeSize:268 Lat:278 SizeLat:278 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:872 CodeSize:534 Lat:554 SizeLat:554 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
%V2I64 = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
index 34397bd..5d227a8 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -35,155 +32,80 @@ declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
define i32 @add(i32 %arg) {
-; V8M-RECIP-LABEL: 'add'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'add'
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:30 CodeSize:34 Lat:30 SizeLat:30 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:60 CodeSize:68 Lat:60 SizeLat:60 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:120 CodeSize:136 Lat:120 SizeLat:120 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:256 SizeLat:256 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:256 SizeLat:256 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:512 CodeSize:640 Lat:512 SizeLat:512 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
;
-; NEON-RECIP-LABEL: 'add'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-LABEL: 'add'
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
-; MVE-RECIP-LABEL: 'add'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'add'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'add'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'add'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 369 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'add'
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:166 CodeSize:112 Lat:166 SizeLat:166 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:298 CodeSize:185 Lat:298 SizeLat:298 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:594 CodeSize:369 Lat:594 SizeLat:594 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:100 CodeSize:70 Lat:100 SizeLat:100 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:100 CodeSize:70 Lat:100 SizeLat:100 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:100 CodeSize:70 Lat:100 SizeLat:100 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -241,155 +163,80 @@ declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
define i32 @sub(i32 %arg) {
-; V8M-RECIP-LABEL: 'sub'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'sub'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'sub'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'sub'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'sub'
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:30 CodeSize:34 Lat:30 SizeLat:30 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:60 CodeSize:68 Lat:60 SizeLat:60 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:120 CodeSize:136 Lat:120 SizeLat:120 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:256 SizeLat:256 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:256 SizeLat:256 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:512 CodeSize:640 Lat:512 SizeLat:512 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
;
-; NEON-SIZE-LABEL: 'sub'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-LABEL: 'sub'
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
-; MVE-SIZE-LABEL: 'sub'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 369 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'sub'
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:166 CodeSize:112 Lat:166 SizeLat:166 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:298 CodeSize:185 Lat:298 SizeLat:298 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:594 CodeSize:369 Lat:594 SizeLat:594 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of 1 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:100 CodeSize:70 Lat:100 SizeLat:100 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:100 CodeSize:70 Lat:100 SizeLat:100 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:100 CodeSize:70 Lat:100 SizeLat:100 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
index 28e4860..c843331 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
@@ -1,10 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -35,155 +32,80 @@ declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>)
define i32 @add(i32 %arg) {
-; V8M-RECIP-LABEL: 'add'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'add'
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:18 Lat:16 SizeLat:16 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:36 Lat:32 SizeLat:32 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:72 Lat:64 SizeLat:64 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:256 SizeLat:256 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
;
-; NEON-RECIP-LABEL: 'add'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-LABEL: 'add'
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 5 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
-; MVE-RECIP-LABEL: 'add'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'add'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'add'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'add'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'add'
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -241,155 +163,80 @@ declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>)
define i32 @sub(i32 %arg) {
-; V8M-RECIP-LABEL: 'sub'
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'sub'
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-RECIP-LABEL: 'sub'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; V8M-SIZE-LABEL: 'sub'
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'sub'
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:18 Lat:16 SizeLat:16 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:36 Lat:32 SizeLat:32 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:72 Lat:64 SizeLat:64 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:128 SizeLat:128 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:256 SizeLat:256 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
;
-; NEON-SIZE-LABEL: 'sub'
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-LABEL: 'sub'
+; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:7 SizeLat:7 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-NEXT: Cost Model: Found costs of 5 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 4 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of 8 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
-; MVE-SIZE-LABEL: 'sub'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'sub'
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
index 6c974af..673bf38 100644
--- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
@@ -1,1579 +1,2413 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d < %s | FileCheck %s --check-prefixes=CHECK,NO-ZFHMIN
; Check that we don't crash querying costs when vectors are not enabled.
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64
define void @fadd() {
; CHECK-LABEL: 'fadd'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fadd <vscale x 1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fadd <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fadd <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fadd <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fadd <vscale x 16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fadd <1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fadd <vscale x 1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fadd <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fadd <vscale x 4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fadd <vscale x 8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fadd.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fadd.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fadd <vscale x 1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fadd <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fadd <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fadd <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fadd <vscale x 16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fadd <1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fadd <vscale x 1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fadd <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fadd <vscale x 4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fadd <vscale x 8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fadd.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fadd.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %BF16 = fadd bfloat undef, undef
- %F32 = fadd float undef, undef
- %F64 = fadd double undef, undef
-
- %V1BF16 = fadd <1 x bfloat> undef, undef
- %V2BF16 = fadd <2 x bfloat> undef, undef
- %V4BF16 = fadd <4 x bfloat> undef, undef
- %V8BF16 = fadd <8 x bfloat> undef, undef
- %V16BF16 = fadd <16 x bfloat> undef, undef
-
- %NXV1BF16 = fadd <vscale x 1 x bfloat> undef, undef
- %NXV2BF16 = fadd <vscale x 2 x bfloat> undef, undef
- %NXV4BF16 = fadd <vscale x 4 x bfloat> undef, undef
- %NXV8BF16 = fadd <vscale x 8 x bfloat> undef, undef
- %NXV16BF16 = fadd <vscale x 16 x bfloat> undef, undef
-
- %V1F32 = fadd <1 x float> undef, undef
- %V2F32 = fadd <2 x float> undef, undef
- %V4F32 = fadd <4 x float> undef, undef
- %V8F32 = fadd <8 x float> undef, undef
- %V16F32 = fadd <16 x float> undef, undef
-
- %NXV1F32 = fadd <vscale x 1 x float> undef, undef
- %NXV2F32 = fadd <vscale x 2 x float> undef, undef
- %NXV4F32 = fadd <vscale x 4 x float> undef, undef
- %NXV8F32 = fadd <vscale x 8 x float> undef, undef
- %NXV16F32 = fadd <vscale x 16 x float> undef, undef
-
- %V1F64 = fadd <1 x double> undef, undef
- %V2F64 = fadd <2 x double> undef, undef
- %V4F64 = fadd <4 x double> undef, undef
- %V8F64 = fadd <8 x double> undef, undef
-
- %NXV1F64 = fadd <vscale x 1 x double> undef, undef
- %NXV2F64 = fadd <vscale x 2 x double> undef, undef
- %NXV4F64 = fadd <vscale x 4 x double> undef, undef
- %NXV8F64 = fadd <vscale x 8 x double> undef, undef
-
- %V1F16_VP = call <1 x bfloat> @llvm.vp.fadd.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x bfloat> @llvm.vp.fadd.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x bfloat> @llvm.vp.fadd.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x bfloat> @llvm.vp.fadd.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x bfloat> @llvm.vp.fadd.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-
- %V1F32_VP = call <1 x float> @llvm.vp.fadd.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
- %V2F32_VP = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
- %V4F32_VP = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
- %V8F32_VP = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
- %V16F32_VP = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-
- %V1F64_VP = call <1 x double> @llvm.vp.fadd.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
- %V2F64_VP = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
- %V4F64_VP = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
- %V8F64_VP = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %F32 = fadd float poison, poison
+ %F64 = fadd double poison, poison
+
+ %V1F32 = fadd <1 x float> poison, poison
+ %V2F32 = fadd <2 x float> poison, poison
+ %V4F32 = fadd <4 x float> poison, poison
+ %V8F32 = fadd <8 x float> poison, poison
+ %V16F32 = fadd <16 x float> poison, poison
+
+ %NXV1F32 = fadd <vscale x 1 x float> poison, poison
+ %NXV2F32 = fadd <vscale x 2 x float> poison, poison
+ %NXV4F32 = fadd <vscale x 4 x float> poison, poison
+ %NXV8F32 = fadd <vscale x 8 x float> poison, poison
+ %NXV16F32 = fadd <vscale x 16 x float> poison, poison
+
+ %V1F64 = fadd <1 x double> poison, poison
+ %V2F64 = fadd <2 x double> poison, poison
+ %V4F64 = fadd <4 x double> poison, poison
+ %V8F64 = fadd <8 x double> poison, poison
+
+ %NXV1F64 = fadd <vscale x 1 x double> poison, poison
+ %NXV2F64 = fadd <vscale x 2 x double> poison, poison
+ %NXV4F64 = fadd <vscale x 4 x double> poison, poison
+ %NXV8F64 = fadd <vscale x 8 x double> poison, poison
+
+ %V1F32_VP = call <1 x float> @llvm.vp.fadd(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+ %V2F32_VP = call <2 x float> @llvm.vp.fadd(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+ %V4F32_VP = call <4 x float> @llvm.vp.fadd(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+ %V8F32_VP = call <8 x float> @llvm.vp.fadd(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+ %V16F32_VP = call <16 x float> @llvm.vp.fadd(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+
+ %V1F64_VP = call <1 x double> @llvm.vp.fadd(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+ %V2F64_VP = call <2 x double> @llvm.vp.fadd(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+ %V4F64_VP = call <4 x double> @llvm.vp.fadd(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+ %V8F64_VP = call <8 x double> @llvm.vp.fadd(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+
+ %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fadd(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fadd(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+
+ %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fadd(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fadd(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fadd(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+
+ ret void
+}
+
+define void @fadd_bf16() {
+; ZVFH-LABEL: 'fadd_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fadd_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fadd_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = fadd bfloat poison, poison
+
+ %V1BF16 = fadd <1 x bfloat> poison, poison
+ %V2BF16 = fadd <2 x bfloat> poison, poison
+ %V4BF16 = fadd <4 x bfloat> poison, poison
+ %V8BF16 = fadd <8 x bfloat> poison, poison
+ %V16BF16 = fadd <16 x bfloat> poison, poison
+
+ %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+ %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+ %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+ %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+ %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+
+ %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+ %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+ %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+ %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+ %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fadd_f16() {
; ZVFH-LABEL: 'fadd_f16'
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd <vscale x 1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd <vscale x 2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd <vscale x 1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd <vscale x 2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd <vscale x 4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd <vscale x 8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd <vscale x 16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'fadd_f16'
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd <vscale x 1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd <vscale x 1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd <vscale x 2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd <vscale x 4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd <vscale x 8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd <vscale x 16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = fadd half undef, undef
-
- %V1F16 = fadd <1 x half> undef, undef
- %V2F16 = fadd <2 x half> undef, undef
- %V4F16 = fadd <4 x half> undef, undef
- %V8F16 = fadd <8 x half> undef, undef
- %V16F16 = fadd <16 x half> undef, undef
- %V32F16 = fadd <32 x half> undef, undef
-
- %NXV1F16 = fadd <vscale x 1 x half> undef, undef
- %NXV2F16 = fadd <vscale x 2 x half> undef, undef
- %NXV4F16 = fadd <vscale x 4 x half> undef, undef
- %NXV8F16 = fadd <vscale x 8 x half> undef, undef
- %NXV16F16 = fadd <vscale x 16 x half> undef, undef
- %NXV32F16 = fadd <vscale x 32 x half> undef, undef
-
- %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; NO-ZFHMIN-LABEL: 'fadd_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fadd <2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fadd <4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fadd <8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fadd <16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fadd <32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fadd <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fadd <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fadd <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fadd <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fadd <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fadd <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %F16 = fadd half poison, poison
+
+ %V1F16 = fadd <1 x half> poison, poison
+ %V2F16 = fadd <2 x half> poison, poison
+ %V4F16 = fadd <4 x half> poison, poison
+ %V8F16 = fadd <8 x half> poison, poison
+ %V16F16 = fadd <16 x half> poison, poison
+ %V32F16 = fadd <32 x half> poison, poison
+
+ %NXV1F16 = fadd <vscale x 1 x half> poison, poison
+ %NXV2F16 = fadd <vscale x 2 x half> poison, poison
+ %NXV4F16 = fadd <vscale x 4 x half> poison, poison
+ %NXV8F16 = fadd <vscale x 8 x half> poison, poison
+ %NXV16F16 = fadd <vscale x 16 x half> poison, poison
+ %NXV32F16 = fadd <vscale x 32 x half> poison, poison
+
+ %V1F16_VP = call <1 x half> @llvm.vp.fadd(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+ %V2F16_VP = call <2 x half> @llvm.vp.fadd(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+ %V4F16_VP = call <4 x half> @llvm.vp.fadd(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+ %V8F16_VP = call <8 x half> @llvm.vp.fadd(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+ %V16F16_VP = call <16 x half> @llvm.vp.fadd(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fsub() {
; CHECK-LABEL: 'fsub'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fsub <vscale x 1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fsub <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fsub <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fsub <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fsub <vscale x 16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fsub <1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fsub <vscale x 1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fsub <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fsub <vscale x 4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fsub <vscale x 8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fsub.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fsub.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fsub <vscale x 1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fsub <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fsub <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fsub <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fsub <vscale x 16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fsub <1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fsub <vscale x 1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fsub <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fsub <vscale x 4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fsub <vscale x 8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fsub.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fsub.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = fsub half undef, undef
- %F32 = fsub float undef, undef
- %F64 = fsub double undef, undef
-
- %V1BF16 = fsub <1 x bfloat> undef, undef
- %V2BF16 = fsub <2 x bfloat> undef, undef
- %V4BF16 = fsub <4 x bfloat> undef, undef
- %V8BF16 = fsub <8 x bfloat> undef, undef
- %V16BF16 = fsub <16 x bfloat> undef, undef
-
- %NXV1BF16 = fsub <vscale x 1 x bfloat> undef, undef
- %NXV2BF16 = fsub <vscale x 2 x bfloat> undef, undef
- %NXV4BF16 = fsub <vscale x 4 x bfloat> undef, undef
- %NXV8BF16 = fsub <vscale x 8 x bfloat> undef, undef
- %NXV16BF16 = fsub <vscale x 16 x bfloat> undef, undef
-
- %V1F32 = fsub <1 x float> undef, undef
- %V2F32 = fsub <2 x float> undef, undef
- %V4F32 = fsub <4 x float> undef, undef
- %V8F32 = fsub <8 x float> undef, undef
- %V16F32 = fsub <16 x float> undef, undef
-
- %NXV1F32 = fsub <vscale x 1 x float> undef, undef
- %NXV2F32 = fsub <vscale x 2 x float> undef, undef
- %NXV4F32 = fsub <vscale x 4 x float> undef, undef
- %NXV8F32 = fsub <vscale x 8 x float> undef, undef
- %NXV16F32 = fsub <vscale x 16 x float> undef, undef
-
- %V1F64 = fsub <1 x double> undef, undef
- %V2F64 = fsub <2 x double> undef, undef
- %V4F64 = fsub <4 x double> undef, undef
- %V8F64 = fsub <8 x double> undef, undef
-
- %NXV1F64 = fsub <vscale x 1 x double> undef, undef
- %NXV2F64 = fsub <vscale x 2 x double> undef, undef
- %NXV4F64 = fsub <vscale x 4 x double> undef, undef
- %NXV8F64 = fsub <vscale x 8 x double> undef, undef
-
- %V1F16_VP = call <1 x bfloat> @llvm.vp.fsub.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x bfloat> @llvm.vp.fsub.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x bfloat> @llvm.vp.fsub.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x bfloat> @llvm.vp.fsub.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x bfloat> @llvm.vp.fsub.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-
- %V1F32_VP = call <1 x float> @llvm.vp.fsub.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
- %V2F32_VP = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
- %V4F32_VP = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
- %V8F32_VP = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
- %V16F32_VP = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-
- %V1F64_VP = call <1 x double> @llvm.vp.fsub.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
- %V2F64_VP = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
- %V4F64_VP = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
- %V8F64_VP = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %F32 = fsub float poison, poison
+ %F64 = fsub double poison, poison
+
+ %V1F32 = fsub <1 x float> poison, poison
+ %V2F32 = fsub <2 x float> poison, poison
+ %V4F32 = fsub <4 x float> poison, poison
+ %V8F32 = fsub <8 x float> poison, poison
+ %V16F32 = fsub <16 x float> poison, poison
+
+ %NXV1F32 = fsub <vscale x 1 x float> poison, poison
+ %NXV2F32 = fsub <vscale x 2 x float> poison, poison
+ %NXV4F32 = fsub <vscale x 4 x float> poison, poison
+ %NXV8F32 = fsub <vscale x 8 x float> poison, poison
+ %NXV16F32 = fsub <vscale x 16 x float> poison, poison
+
+ %V1F64 = fsub <1 x double> poison, poison
+ %V2F64 = fsub <2 x double> poison, poison
+ %V4F64 = fsub <4 x double> poison, poison
+ %V8F64 = fsub <8 x double> poison, poison
+
+ %NXV1F64 = fsub <vscale x 1 x double> poison, poison
+ %NXV2F64 = fsub <vscale x 2 x double> poison, poison
+ %NXV4F64 = fsub <vscale x 4 x double> poison, poison
+ %NXV8F64 = fsub <vscale x 8 x double> poison, poison
+
+ %V1F32_VP = call <1 x float> @llvm.vp.fsub(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+ %V2F32_VP = call <2 x float> @llvm.vp.fsub(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+ %V4F32_VP = call <4 x float> @llvm.vp.fsub(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+ %V8F32_VP = call <8 x float> @llvm.vp.fsub(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+ %V16F32_VP = call <16 x float> @llvm.vp.fsub(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+
+ %V1F64_VP = call <1 x double> @llvm.vp.fsub(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+ %V2F64_VP = call <2 x double> @llvm.vp.fsub(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+ %V4F64_VP = call <4 x double> @llvm.vp.fsub(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+ %V8F64_VP = call <8 x double> @llvm.vp.fsub(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+
+ %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fsub(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fsub(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+
+ %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fsub(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fsub(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fsub(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+
+ ret void
+}
+
+define void @fsub_bf16() {
+; ZVFH-LABEL: 'fsub_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fsub_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fsub_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = fsub bfloat poison, poison
+
+ %V1BF16 = fsub <1 x bfloat> poison, poison
+ %V2BF16 = fsub <2 x bfloat> poison, poison
+ %V4BF16 = fsub <4 x bfloat> poison, poison
+ %V8BF16 = fsub <8 x bfloat> poison, poison
+ %V16BF16 = fsub <16 x bfloat> poison, poison
+ %V32BF16 = fsub <32 x bfloat> poison, poison
+
+ %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+ %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+ %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+ %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+ %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+ %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+
+ %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+ %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+ %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+ %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+ %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fsub_f16() {
; ZVFH-LABEL: 'fsub_f16'
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub <vscale x 1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub <vscale x 2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub <vscale x 1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub <vscale x 2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub <vscale x 4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub <vscale x 8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub <vscale x 16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'fsub_f16'
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub <vscale x 1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub <vscale x 1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub <vscale x 2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub <vscale x 4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub <vscale x 8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub <vscale x 16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = fsub half undef, undef
-
- %V1F16 = fsub <1 x half> undef, undef
- %V2F16 = fsub <2 x half> undef, undef
- %V4F16 = fsub <4 x half> undef, undef
- %V8F16 = fsub <8 x half> undef, undef
- %V16F16 = fsub <16 x half> undef, undef
- %V32F16 = fsub <32 x half> undef, undef
-
- %NXV1F16 = fsub <vscale x 1 x half> undef, undef
- %NXV2F16 = fsub <vscale x 2 x half> undef, undef
- %NXV4F16 = fsub <vscale x 4 x half> undef, undef
- %NXV8F16 = fsub <vscale x 8 x half> undef, undef
- %NXV16F16 = fsub <vscale x 16 x half> undef, undef
- %NXV32F16 = fsub <vscale x 32 x half> undef, undef
-
- %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; NO-ZFHMIN-LABEL: 'fsub_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fsub <2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fsub <4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fsub <8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fsub <16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fsub <32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fsub <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fsub <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fsub <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fsub <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fsub <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fsub <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %F16 = fsub half poison, poison
+
+ %V1F16 = fsub <1 x half> poison, poison
+ %V2F16 = fsub <2 x half> poison, poison
+ %V4F16 = fsub <4 x half> poison, poison
+ %V8F16 = fsub <8 x half> poison, poison
+ %V16F16 = fsub <16 x half> poison, poison
+ %V32F16 = fsub <32 x half> poison, poison
+
+ %NXV1F16 = fsub <vscale x 1 x half> poison, poison
+ %NXV2F16 = fsub <vscale x 2 x half> poison, poison
+ %NXV4F16 = fsub <vscale x 4 x half> poison, poison
+ %NXV8F16 = fsub <vscale x 8 x half> poison, poison
+ %NXV16F16 = fsub <vscale x 16 x half> poison, poison
+ %NXV32F16 = fsub <vscale x 32 x half> poison, poison
+
+ %V1F16_VP = call <1 x half> @llvm.vp.fsub(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+ %V2F16_VP = call <2 x half> @llvm.vp.fsub(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+ %V4F16_VP = call <4 x half> @llvm.vp.fsub(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+ %V8F16_VP = call <8 x half> @llvm.vp.fsub(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+ %V16F16_VP = call <16 x half> @llvm.vp.fsub(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fmul() {
; CHECK-LABEL: 'fmul'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fmul <16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fmul <vscale x 1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fmul <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fmul <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fmul <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fmul <vscale x 16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fmul <1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fmul <vscale x 1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fmul <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fmul <vscale x 4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fmul <vscale x 8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fmul.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fmul.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fmul <16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fmul <vscale x 1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fmul <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fmul <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fmul <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fmul <vscale x 16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fmul <1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fmul <vscale x 1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fmul <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fmul <vscale x 4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fmul <vscale x 8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fmul.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fmul.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %BF16 = fmul bfloat undef, undef
- %F32 = fmul float undef, undef
- %F64 = fmul double undef, undef
-
- %V1BF16 = fmul <1 x bfloat> undef, undef
- %V2BF16 = fmul <2 x bfloat> undef, undef
- %V4BF16 = fmul <4 x bfloat> undef, undef
- %V8BF16 = fmul <8 x bfloat> undef, undef
- %V16BF16 = fmul <16 x bfloat> undef, undef
-
- %NXV1BF16 = fmul <vscale x 1 x bfloat> undef, undef
- %NXV2BF16 = fmul <vscale x 2 x bfloat> undef, undef
- %NXV4BF16 = fmul <vscale x 4 x bfloat> undef, undef
- %NXV8BF16 = fmul <vscale x 8 x bfloat> undef, undef
- %NXV16BF16 = fmul <vscale x 16 x bfloat> undef, undef
-
- %V1F32 = fmul <1 x float> undef, undef
- %V2F32 = fmul <2 x float> undef, undef
- %V4F32 = fmul <4 x float> undef, undef
- %V8F32 = fmul <8 x float> undef, undef
- %V16F32 = fmul <16 x float> undef, undef
-
- %NXV1F32 = fmul <vscale x 1 x float> undef, undef
- %NXV2F32 = fmul <vscale x 2 x float> undef, undef
- %NXV4F32 = fmul <vscale x 4 x float> undef, undef
- %NXV8F32 = fmul <vscale x 8 x float> undef, undef
- %NXV16F32 = fmul <vscale x 16 x float> undef, undef
-
- %V1F64 = fmul <1 x double> undef, undef
- %V2F64 = fmul <2 x double> undef, undef
- %V4F64 = fmul <4 x double> undef, undef
- %V8F64 = fmul <8 x double> undef, undef
-
- %NXV1F64 = fmul <vscale x 1 x double> undef, undef
- %NXV2F64 = fmul <vscale x 2 x double> undef, undef
- %NXV4F64 = fmul <vscale x 4 x double> undef, undef
- %NXV8F64 = fmul <vscale x 8 x double> undef, undef
-
- %V1F16_VP = call <1 x bfloat> @llvm.vp.fmul.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x bfloat> @llvm.vp.fmul.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x bfloat> @llvm.vp.fmul.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x bfloat> @llvm.vp.fmul.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x bfloat> @llvm.vp.fmul.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-
- %V1F32_VP = call <1 x float> @llvm.vp.fmul.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
- %V2F32_VP = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
- %V4F32_VP = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
- %V8F32_VP = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
- %V16F32_VP = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-
- %V1F64_VP = call <1 x double> @llvm.vp.fmul.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
- %V2F64_VP = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
- %V4F64_VP = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
- %V8F64_VP = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %F32 = fmul float poison, poison
+ %F64 = fmul double poison, poison
+
+ %V1F32 = fmul <1 x float> poison, poison
+ %V2F32 = fmul <2 x float> poison, poison
+ %V4F32 = fmul <4 x float> poison, poison
+ %V8F32 = fmul <8 x float> poison, poison
+ %V16F32 = fmul <16 x float> poison, poison
+
+ %NXV1F32 = fmul <vscale x 1 x float> poison, poison
+ %NXV2F32 = fmul <vscale x 2 x float> poison, poison
+ %NXV4F32 = fmul <vscale x 4 x float> poison, poison
+ %NXV8F32 = fmul <vscale x 8 x float> poison, poison
+ %NXV16F32 = fmul <vscale x 16 x float> poison, poison
+
+ %V1F64 = fmul <1 x double> poison, poison
+ %V2F64 = fmul <2 x double> poison, poison
+ %V4F64 = fmul <4 x double> poison, poison
+ %V8F64 = fmul <8 x double> poison, poison
+
+ %NXV1F64 = fmul <vscale x 1 x double> poison, poison
+ %NXV2F64 = fmul <vscale x 2 x double> poison, poison
+ %NXV4F64 = fmul <vscale x 4 x double> poison, poison
+ %NXV8F64 = fmul <vscale x 8 x double> poison, poison
+
+ %V1F32_VP = call <1 x float> @llvm.vp.fmul(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+ %V2F32_VP = call <2 x float> @llvm.vp.fmul(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+ %V4F32_VP = call <4 x float> @llvm.vp.fmul(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+ %V8F32_VP = call <8 x float> @llvm.vp.fmul(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+ %V16F32_VP = call <16 x float> @llvm.vp.fmul(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+
+ %V1F64_VP = call <1 x double> @llvm.vp.fmul(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+ %V2F64_VP = call <2 x double> @llvm.vp.fmul(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+ %V4F64_VP = call <4 x double> @llvm.vp.fmul(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+ %V8F64_VP = call <8 x double> @llvm.vp.fmul(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+
+ %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fmul(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fmul(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+
+ %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fmul(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fmul(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fmul(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+
+ ret void
+}
+
+define void @fmul_bf16() {
+; ZVFH-LABEL: 'fmul_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fmul_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fmul_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = fmul bfloat poison, poison
+
+ %V1BF16 = fmul <1 x bfloat> poison, poison
+ %V2BF16 = fmul <2 x bfloat> poison, poison
+ %V4BF16 = fmul <4 x bfloat> poison, poison
+ %V8BF16 = fmul <8 x bfloat> poison, poison
+ %V16BF16 = fmul <16 x bfloat> poison, poison
+ %V32BF16 = fmul <32 x bfloat> poison, poison
+
+ %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+ %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+ %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+ %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+ %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+ %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+
+ %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+ %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+ %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+ %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+ %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fmul_f16() {
; ZVFH-LABEL: 'fmul_f16'
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul <vscale x 1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul <vscale x 2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul <vscale x 1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul <vscale x 2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul <vscale x 4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul <vscale x 8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul <vscale x 16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'fmul_f16'
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul <vscale x 1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul <vscale x 1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul <vscale x 2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul <vscale x 4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul <vscale x 8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul <vscale x 16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = fmul half undef, undef
-
- %V1F16 = fmul <1 x half> undef, undef
- %V2F16 = fmul <2 x half> undef, undef
- %V4F16 = fmul <4 x half> undef, undef
- %V8F16 = fmul <8 x half> undef, undef
- %V16F16 = fmul <16 x half> undef, undef
- %V32F16 = fmul <32 x half> undef, undef
-
- %NXV1F16 = fmul <vscale x 1 x half> undef, undef
- %NXV2F16 = fmul <vscale x 2 x half> undef, undef
- %NXV4F16 = fmul <vscale x 4 x half> undef, undef
- %NXV8F16 = fmul <vscale x 8 x half> undef, undef
- %NXV16F16 = fmul <vscale x 16 x half> undef, undef
- %NXV32F16 = fmul <vscale x 32 x half> undef, undef
-
- %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; NO-ZFHMIN-LABEL: 'fmul_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fmul <2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fmul <4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fmul <8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fmul <16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fmul <32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fmul <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fmul <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fmul <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fmul <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fmul <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fmul <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %F16 = fmul half poison, poison
+
+ %V1F16 = fmul <1 x half> poison, poison
+ %V2F16 = fmul <2 x half> poison, poison
+ %V4F16 = fmul <4 x half> poison, poison
+ %V8F16 = fmul <8 x half> poison, poison
+ %V16F16 = fmul <16 x half> poison, poison
+ %V32F16 = fmul <32 x half> poison, poison
+
+ %NXV1F16 = fmul <vscale x 1 x half> poison, poison
+ %NXV2F16 = fmul <vscale x 2 x half> poison, poison
+ %NXV4F16 = fmul <vscale x 4 x half> poison, poison
+ %NXV8F16 = fmul <vscale x 8 x half> poison, poison
+ %NXV16F16 = fmul <vscale x 16 x half> poison, poison
+ %NXV32F16 = fmul <vscale x 32 x half> poison, poison
+
+ %V1F16_VP = call <1 x half> @llvm.vp.fmul(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+ %V2F16_VP = call <2 x half> @llvm.vp.fmul(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+ %V4F16_VP = call <4 x half> @llvm.vp.fmul(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+ %V8F16_VP = call <8 x half> @llvm.vp.fmul(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+ %V16F16_VP = call <16 x half> @llvm.vp.fmul(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fdiv() {
; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fdiv <16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fdiv <vscale x 1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fdiv <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fdiv <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fdiv <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fdiv <vscale x 16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fdiv <1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fdiv <vscale x 1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fdiv <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fdiv <vscale x 4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fdiv <vscale x 8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fdiv.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fdiv.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fdiv <16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fdiv <vscale x 1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fdiv <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fdiv <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fdiv <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fdiv <vscale x 16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fdiv <1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fdiv <vscale x 1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fdiv <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fdiv <vscale x 4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fdiv <vscale x 8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fdiv.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fdiv.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %BF16 = fdiv bfloat undef, undef
- %F32 = fdiv float undef, undef
- %F64 = fdiv double undef, undef
-
- %V1BF16 = fdiv <1 x bfloat> undef, undef
- %V2BF16 = fdiv <2 x bfloat> undef, undef
- %V4BF16 = fdiv <4 x bfloat> undef, undef
- %V8BF16 = fdiv <8 x bfloat> undef, undef
- %V16BF16 = fdiv <16 x bfloat> undef, undef
-
- %NXV1BF16 = fdiv <vscale x 1 x bfloat> undef, undef
- %NXV2BF16 = fdiv <vscale x 2 x bfloat> undef, undef
- %NXV4BF16 = fdiv <vscale x 4 x bfloat> undef, undef
- %NXV8BF16 = fdiv <vscale x 8 x bfloat> undef, undef
- %NXV16BF16 = fdiv <vscale x 16 x bfloat> undef, undef
-
- %V1F32 = fdiv <1 x float> undef, undef
- %V2F32 = fdiv <2 x float> undef, undef
- %V4F32 = fdiv <4 x float> undef, undef
- %V8F32 = fdiv <8 x float> undef, undef
- %V16F32 = fdiv <16 x float> undef, undef
-
- %NXV1F32 = fdiv <vscale x 1 x float> undef, undef
- %NXV2F32 = fdiv <vscale x 2 x float> undef, undef
- %NXV4F32 = fdiv <vscale x 4 x float> undef, undef
- %NXV8F32 = fdiv <vscale x 8 x float> undef, undef
- %NXV16F32 = fdiv <vscale x 16 x float> undef, undef
-
- %V1F64 = fdiv <1 x double> undef, undef
- %V2F64 = fdiv <2 x double> undef, undef
- %V4F64 = fdiv <4 x double> undef, undef
- %V8F64 = fdiv <8 x double> undef, undef
-
- %NXV1F64 = fdiv <vscale x 1 x double> undef, undef
- %NXV2F64 = fdiv <vscale x 2 x double> undef, undef
- %NXV4F64 = fdiv <vscale x 4 x double> undef, undef
- %NXV8F64 = fdiv <vscale x 8 x double> undef, undef
-
- %V1F16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-
- %V1F32_VP = call <1 x float> @llvm.vp.fdiv.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
- %V2F32_VP = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
- %V4F32_VP = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
- %V8F32_VP = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
- %V16F32_VP = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-
- %V1F64_VP = call <1 x double> @llvm.vp.fdiv.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
- %V2F64_VP = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
- %V4F64_VP = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
- %V8F64_VP = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %F32 = fdiv float poison, poison
+ %F64 = fdiv double poison, poison
+
+ %V1F32 = fdiv <1 x float> poison, poison
+ %V2F32 = fdiv <2 x float> poison, poison
+ %V4F32 = fdiv <4 x float> poison, poison
+ %V8F32 = fdiv <8 x float> poison, poison
+ %V16F32 = fdiv <16 x float> poison, poison
+
+ %NXV1F32 = fdiv <vscale x 1 x float> poison, poison
+ %NXV2F32 = fdiv <vscale x 2 x float> poison, poison
+ %NXV4F32 = fdiv <vscale x 4 x float> poison, poison
+ %NXV8F32 = fdiv <vscale x 8 x float> poison, poison
+ %NXV16F32 = fdiv <vscale x 16 x float> poison, poison
+
+ %V1F64 = fdiv <1 x double> poison, poison
+ %V2F64 = fdiv <2 x double> poison, poison
+ %V4F64 = fdiv <4 x double> poison, poison
+ %V8F64 = fdiv <8 x double> poison, poison
+
+ %NXV1F64 = fdiv <vscale x 1 x double> poison, poison
+ %NXV2F64 = fdiv <vscale x 2 x double> poison, poison
+ %NXV4F64 = fdiv <vscale x 4 x double> poison, poison
+ %NXV8F64 = fdiv <vscale x 8 x double> poison, poison
+
+ %V1F32_VP = call <1 x float> @llvm.vp.fdiv(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+ %V2F32_VP = call <2 x float> @llvm.vp.fdiv(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+ %V4F32_VP = call <4 x float> @llvm.vp.fdiv(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+ %V8F32_VP = call <8 x float> @llvm.vp.fdiv(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+ %V16F32_VP = call <16 x float> @llvm.vp.fdiv(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+
+ %V1F64_VP = call <1 x double> @llvm.vp.fdiv(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+ %V2F64_VP = call <2 x double> @llvm.vp.fdiv(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+ %V4F64_VP = call <4 x double> @llvm.vp.fdiv(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+ %V8F64_VP = call <8 x double> @llvm.vp.fdiv(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+
+
+ %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fdiv(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fdiv(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+
+ %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fdiv(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fdiv(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fdiv(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+
+ ret void
+}
+
+define void @fdiv_bf16() {
+; ZVFH-LABEL: 'fdiv_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fdiv_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fdiv_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = fdiv bfloat poison, poison
+
+ %V1BF16 = fdiv <1 x bfloat> poison, poison
+ %V2BF16 = fdiv <2 x bfloat> poison, poison
+ %V4BF16 = fdiv <4 x bfloat> poison, poison
+ %V8BF16 = fdiv <8 x bfloat> poison, poison
+ %V16BF16 = fdiv <16 x bfloat> poison, poison
+ %V32BF16 = fdiv <32 x bfloat> poison, poison
+
+ %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+ %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+ %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+ %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+ %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+ %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+
+ %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+ %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+ %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+ %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+ %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fdiv_f16() {
; ZVFH-LABEL: 'fdiv_f16'
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'fdiv_f16'
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = fdiv half undef, undef
-
- %V1F16 = fdiv <1 x half> undef, undef
- %V2F16 = fdiv <2 x half> undef, undef
- %V4F16 = fdiv <4 x half> undef, undef
- %V8F16 = fdiv <8 x half> undef, undef
- %V16F16 = fdiv <16 x half> undef, undef
- %V32F16 = fdiv <32 x half> undef, undef
-
- %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
- %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
- %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
- %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
- %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
- %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
-
- %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> undef, <4 x half> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> undef, <8 x half> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> undef, <16 x half> undef, <16 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; NO-ZFHMIN-LABEL: 'fdiv_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fdiv <2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fdiv <4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fdiv <8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fdiv <16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fdiv <32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fdiv <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fdiv <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fdiv <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fdiv <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fdiv <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fdiv <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %F16 = fdiv half poison, poison
+
+ %V1F16 = fdiv <1 x half> poison, poison
+ %V2F16 = fdiv <2 x half> poison, poison
+ %V4F16 = fdiv <4 x half> poison, poison
+ %V8F16 = fdiv <8 x half> poison, poison
+ %V16F16 = fdiv <16 x half> poison, poison
+ %V32F16 = fdiv <32 x half> poison, poison
+
+ %NXV1F16 = fdiv <vscale x 1 x half> poison, poison
+ %NXV2F16 = fdiv <vscale x 2 x half> poison, poison
+ %NXV4F16 = fdiv <vscale x 4 x half> poison, poison
+ %NXV8F16 = fdiv <vscale x 8 x half> poison, poison
+ %NXV16F16 = fdiv <vscale x 16 x half> poison, poison
+ %NXV32F16 = fdiv <vscale x 32 x half> poison, poison
+
+ %V1F16_VP = call <1 x half> @llvm.vp.fdiv(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+ %V2F16_VP = call <2 x half> @llvm.vp.fdiv(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+ %V4F16_VP = call <4 x half> @llvm.vp.fdiv(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+ %V8F16_VP = call <8 x half> @llvm.vp.fdiv(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+ %V16F16_VP = call <16 x half> @llvm.vp.fdiv(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @frem() {
; CHECK-LABEL: 'frem'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32 = frem <vscale x 1 x float> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32 = frem <vscale x 2 x float> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32 = frem <vscale x 4 x float> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32 = frem <vscale x 8 x float> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32 = frem <vscale x 16 x float> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64 = frem <vscale x 1 x double> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32 = frem <vscale x 1 x float> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32 = frem <vscale x 2 x float> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32 = frem <vscale x 4 x float> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32 = frem <vscale x 8 x float> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32 = frem <vscale x 16 x float> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64 = frem <vscale x 1 x double> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> poison, poison
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> poison, poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %BF16 = frem bfloat undef, undef
- %F32 = frem float undef, undef
- %F64 = frem double undef, undef
-
- %V1BF16 = frem <1 x bfloat> undef, undef
- %V2BF16 = frem <2 x bfloat> undef, undef
- %V4BF16 = frem <4 x bfloat> undef, undef
- %V8BF16 = frem <8 x bfloat> undef, undef
- %V16BF16 = frem <16 x bfloat> undef, undef
-
- %NXV1BF16 = frem <vscale x 1 x bfloat> undef, undef
- %NXV2BF16 = frem <vscale x 2 x bfloat> undef, undef
- %NXV4BF16 = frem <vscale x 4 x bfloat> undef, undef
- %NXV8BF16 = frem <vscale x 8 x bfloat> undef, undef
- %NXV16BF16 = frem <vscale x 16 x bfloat> undef, undef
-
- %V1F32 = frem <1 x float> undef, undef
- %V2F32 = frem <2 x float> undef, undef
- %V4F32 = frem <4 x float> undef, undef
- %V8F32 = frem <8 x float> undef, undef
- %V16F32 = frem <16 x float> undef, undef
-
- %NXV1F32 = frem <vscale x 1 x float> undef, undef
- %NXV2F32 = frem <vscale x 2 x float> undef, undef
- %NXV4F32 = frem <vscale x 4 x float> undef, undef
- %NXV8F32 = frem <vscale x 8 x float> undef, undef
- %NXV16F32 = frem <vscale x 16 x float> undef, undef
-
- %V1F64 = frem <1 x double> undef, undef
- %V2F64 = frem <2 x double> undef, undef
- %V4F64 = frem <4 x double> undef, undef
- %V8F64 = frem <8 x double> undef, undef
-
- %NXV1F64 = frem <vscale x 1 x double> undef, undef
- %NXV2F64 = frem <vscale x 2 x double> undef, undef
- %NXV4F64 = frem <vscale x 4 x double> undef, undef
- %NXV8F64 = frem <vscale x 8 x double> undef, undef
-
- %V1F16_VP = call <1 x bfloat> @llvm.vp.frem.v1f16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
- %V2F16_VP = call <2 x bfloat> @llvm.vp.frem.v2f16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
- %V4F16_VP = call <4 x bfloat> @llvm.vp.frem.v4f16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
- %V8F16_VP = call <8 x bfloat> @llvm.vp.frem.v8f16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
- %V16F16_VP = call <16 x bfloat> @llvm.vp.frem.v16f16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-
- %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> undef, <1 x float> undef, <1 x i1> undef, i32 undef)
- %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
- %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
- %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
- %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
-
- %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> undef, <1 x double> undef, <1 x i1> undef, i32 undef)
- %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
- %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
- %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
-
- %NXV1F16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %F32 = frem float poison, poison
+ %F64 = frem double poison, poison
+
+ %V1F32 = frem <1 x float> poison, poison
+ %V2F32 = frem <2 x float> poison, poison
+ %V4F32 = frem <4 x float> poison, poison
+ %V8F32 = frem <8 x float> poison, poison
+ %V16F32 = frem <16 x float> poison, poison
+
+ %NXV1F32 = frem <vscale x 1 x float> poison, poison
+ %NXV2F32 = frem <vscale x 2 x float> poison, poison
+ %NXV4F32 = frem <vscale x 4 x float> poison, poison
+ %NXV8F32 = frem <vscale x 8 x float> poison, poison
+ %NXV16F32 = frem <vscale x 16 x float> poison, poison
+
+ %V1F64 = frem <1 x double> poison, poison
+ %V2F64 = frem <2 x double> poison, poison
+ %V4F64 = frem <4 x double> poison, poison
+ %V8F64 = frem <8 x double> poison, poison
+
+ %NXV1F64 = frem <vscale x 1 x double> poison, poison
+ %NXV2F64 = frem <vscale x 2 x double> poison, poison
+ %NXV4F64 = frem <vscale x 4 x double> poison, poison
+ %NXV8F64 = frem <vscale x 8 x double> poison, poison
+
+ %V1F32_VP = call <1 x float> @llvm.vp.frem(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
+ %V2F32_VP = call <2 x float> @llvm.vp.frem(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
+ %V4F32_VP = call <4 x float> @llvm.vp.frem(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
+ %V8F32_VP = call <8 x float> @llvm.vp.frem(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison)
+ %V16F32_VP = call <16 x float> @llvm.vp.frem(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison)
+
+ %V1F64_VP = call <1 x double> @llvm.vp.frem(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison)
+ %V2F64_VP = call <2 x double> @llvm.vp.frem(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
+ %V4F64_VP = call <4 x double> @llvm.vp.frem(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
+ %V8F64_VP = call <8 x double> @llvm.vp.frem(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
+
+ %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.frem(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.frem(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+
+ %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.frem(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.frem(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.frem(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+
+ ret void
+}
+
+define void @frem_bf16() {
+; ZVFH-LABEL: 'frem_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'frem_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'frem_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = frem bfloat poison, poison
+
+ %V1BF16 = frem <1 x bfloat> poison, poison
+ %V2BF16 = frem <2 x bfloat> poison, poison
+ %V4BF16 = frem <4 x bfloat> poison, poison
+ %V8BF16 = frem <8 x bfloat> poison, poison
+ %V16BF16 = frem <16 x bfloat> poison, poison
+ %V32BF16 = frem <32 x bfloat> poison, poison
+
+ %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+ %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+ %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+ %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+ %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+ %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+
+ %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+ %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+ %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+ %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+ %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @frem_f16() {
-; CHECK-LABEL: 'frem_f16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> undef, undef
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'frem_f16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'frem_f16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = frem half undef, undef
-
- %V1F16 = frem <1 x half> undef, undef
- %V2F16 = frem <2 x half> undef, undef
- %V4F16 = frem <4 x half> undef, undef
- %V8F16 = frem <8 x half> undef, undef
- %V16F16 = frem <16 x half> undef, undef
- %V32F16 = frem <32 x half> undef, undef
-
- %NXV1F16 = frem <vscale x 1 x half> undef, undef
- %NXV2F16 = frem <vscale x 2 x half> undef, undef
- %NXV4F16 = frem <vscale x 4 x half> undef, undef
- %NXV8F16 = frem <vscale x 8 x half> undef, undef
- %NXV16F16 = frem <vscale x 16 x half> undef, undef
- %NXV32F16 = frem <vscale x 32 x half> undef, undef
+; NO-ZFHMIN-LABEL: 'frem_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = frem <1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = frem <2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = frem <4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = frem <8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = frem <16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = frem <32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %F16 = frem half poison, poison
+
+ %V1F16 = frem <1 x half> poison, poison
+ %V2F16 = frem <2 x half> poison, poison
+ %V4F16 = frem <4 x half> poison, poison
+ %V8F16 = frem <8 x half> poison, poison
+ %V16F16 = frem <16 x half> poison, poison
+ %V32F16 = frem <32 x half> poison, poison
+
+ %NXV1F16 = frem <vscale x 1 x half> poison, poison
+ %NXV2F16 = frem <vscale x 2 x half> poison, poison
+ %NXV4F16 = frem <vscale x 4 x half> poison, poison
+ %NXV8F16 = frem <vscale x 8 x half> poison, poison
+ %NXV16F16 = frem <vscale x 16 x half> poison, poison
+ %NXV32F16 = frem <vscale x 32 x half> poison, poison
+
+ %V1F16_VP = call <1 x half> @llvm.vp.frem(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+ %V2F16_VP = call <2 x half> @llvm.vp.frem(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+ %V4F16_VP = call <4 x half> @llvm.vp.frem(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+ %V8F16_VP = call <8 x half> @llvm.vp.frem(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+ %V16F16_VP = call <16 x half> @llvm.vp.frem(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fneg() {
; CHECK-LABEL: 'fneg'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg half undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fneg <1 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fneg <vscale x 1 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fneg <vscale x 2 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fneg <vscale x 4 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fneg <vscale x 8 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fneg <vscale x 16 x float> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fneg <1 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <2 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fneg <vscale x 1 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fneg <vscale x 2 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fneg <vscale x 4 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fneg <vscale x 8 x double> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fneg.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fneg.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64_VP = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4BF32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8BF32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16BF32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2BF64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4BF64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8BF64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16BF64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fneg <1 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fneg <vscale x 1 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fneg <vscale x 2 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fneg <vscale x 4 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fneg <vscale x 8 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fneg <vscale x 16 x float> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fneg <1 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <2 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fneg <vscale x 1 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fneg <vscale x 2 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fneg <vscale x 4 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fneg <vscale x 8 x double> poison
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fneg.v1f32(<1 x float> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.fneg.v1f64(<1 x double> poison, <1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64_VP = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16F64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %BF16 = fneg half undef
- %F32 = fneg float undef
- %F64 = fneg double undef
-
- %V1BF16 = fneg <1 x bfloat> undef
- %V2BF16 = fneg <2 x bfloat> undef
- %V4BF16 = fneg <4 x bfloat> undef
- %V8BF16 = fneg <8 x bfloat> undef
- %V16BF16 = fneg <16 x bfloat> undef
-
- %NXV1BF16 = fneg <vscale x 1 x bfloat> undef
- %NXV2BF16 = fneg <vscale x 2 x bfloat> undef
- %NXV4BF16 = fneg <vscale x 4 x bfloat> undef
- %NXV8BF16 = fneg <vscale x 8 x bfloat> undef
- %NXV16BF16 = fneg <vscale x 16 x bfloat> undef
-
- %V1F32 = fneg <1 x float> undef
- %V2F32 = fneg <2 x float> undef
- %V4F32 = fneg <4 x float> undef
- %V8F32 = fneg <8 x float> undef
- %V16F32 = fneg <16 x float> undef
-
- %NXV1F32 = fneg <vscale x 1 x float> undef
- %NXV2F32 = fneg <vscale x 2 x float> undef
- %NXV4F32 = fneg <vscale x 4 x float> undef
- %NXV8F32 = fneg <vscale x 8 x float> undef
- %NXV16F32 = fneg <vscale x 16 x float> undef
-
- %V1F64 = fneg <1 x double> undef
- %V2F64 = fneg <2 x double> undef
- %V4F64 = fneg <4 x double> undef
- %V8F64 = fneg <8 x double> undef
-
- %NXV1F64 = fneg <vscale x 1 x double> undef
- %NXV2F64 = fneg <vscale x 2 x double> undef
- %NXV4F64 = fneg <vscale x 4 x double> undef
- %NXV8F64 = fneg <vscale x 8 x double> undef
-
- %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1f16(<1 x bfloat> undef, <1 x i1> undef, i32 undef)
- %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2f16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
- %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4f16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
- %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8f16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
- %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16f16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
-
- %V1F32_VP = call <1 x float> @llvm.vp.fneg.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
- %V2F32_VP = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
- %V4F32_VP = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
- %V8F32_VP = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
- %V16F32_VP = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-
- %V1F64_VP = call <1 x double> @llvm.vp.fneg.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
- %V2F64_VP = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
- %V4F64_VP = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
- %V8F64_VP = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
- %V16F64_VP = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-
- %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1f16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2f16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4f16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8f16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16f16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1BF32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2BF32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4BF32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8BF32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16BF32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-
- %NXV1BF64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
- %NXV2BF64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
- %NXV4BF64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
- %NXV8BF64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
- %NXV16BF64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+ %F32 = fneg float poison
+ %F64 = fneg double poison
+
+ %V1F32 = fneg <1 x float> poison
+ %V2F32 = fneg <2 x float> poison
+ %V4F32 = fneg <4 x float> poison
+ %V8F32 = fneg <8 x float> poison
+ %V16F32 = fneg <16 x float> poison
+
+ %NXV1F32 = fneg <vscale x 1 x float> poison
+ %NXV2F32 = fneg <vscale x 2 x float> poison
+ %NXV4F32 = fneg <vscale x 4 x float> poison
+ %NXV8F32 = fneg <vscale x 8 x float> poison
+ %NXV16F32 = fneg <vscale x 16 x float> poison
+
+ %V1F64 = fneg <1 x double> poison
+ %V2F64 = fneg <2 x double> poison
+ %V4F64 = fneg <4 x double> poison
+ %V8F64 = fneg <8 x double> poison
+
+ %NXV1F64 = fneg <vscale x 1 x double> poison
+ %NXV2F64 = fneg <vscale x 2 x double> poison
+ %NXV4F64 = fneg <vscale x 4 x double> poison
+ %NXV8F64 = fneg <vscale x 8 x double> poison
+
+ %V1F32_VP = call <1 x float> @llvm.vp.fneg(<1 x float> poison, <1 x i1> poison, i32 poison)
+ %V2F32_VP = call <2 x float> @llvm.vp.fneg(<2 x float> poison, <2 x i1> poison, i32 poison)
+ %V4F32_VP = call <4 x float> @llvm.vp.fneg(<4 x float> poison, <4 x i1> poison, i32 poison)
+ %V8F32_VP = call <8 x float> @llvm.vp.fneg(<8 x float> poison, <8 x i1> poison, i32 poison)
+ %V16F32_VP = call <16 x float> @llvm.vp.fneg(<16 x float> poison, <16 x i1> poison, i32 poison)
+
+ %V1F64_VP = call <1 x double> @llvm.vp.fneg(<1 x double> poison, <1 x i1> poison, i32 poison)
+ %V2F64_VP = call <2 x double> @llvm.vp.fneg(<2 x double> poison, <2 x i1> poison, i32 poison)
+ %V4F64_VP = call <4 x double> @llvm.vp.fneg(<4 x double> poison, <4 x i1> poison, i32 poison)
+ %V8F64_VP = call <8 x double> @llvm.vp.fneg(<8 x double> poison, <8 x i1> poison, i32 poison)
+ %V16F64_VP = call <16 x double> @llvm.vp.fneg(<16 x double> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fneg(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fneg(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fneg(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fneg(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fneg(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+
+ %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fneg(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fneg(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fneg(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fneg(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F64_VP = call <vscale x 16 x double> @llvm.vp.fneg(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
+
+ ret void
+}
+
+define void @fneg_bf16() {
+; ZVFH-LABEL: 'fneg_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32BF16 = fneg <32 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fneg_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32BF16 = fneg <32 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fneg_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fneg <2 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fneg <4 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fneg <8 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fneg <16 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fneg <32 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = fneg bfloat poison
+
+ %V1BF16 = fneg <1 x bfloat> poison
+ %V2BF16 = fneg <2 x bfloat> poison
+ %V4BF16 = fneg <4 x bfloat> poison
+ %V8BF16 = fneg <8 x bfloat> poison
+ %V16BF16 = fneg <16 x bfloat> poison
+ %V32BF16 = fneg <32 x bfloat> poison
+
+ %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+ %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+ %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+ %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+ %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+ %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+
+ %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+ %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+ %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+ %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+ %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fneg_f16() {
-; CHECK-LABEL: 'fneg_f16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fneg_f16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fneg_f16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = fneg half undef
-
- %V1F16 = fneg <1 x half> undef
- %V2F16 = fneg <2 x half> undef
- %V4F16 = fneg <4 x half> undef
- %V8F16 = fneg <8 x half> undef
- %V16F16 = fneg <16 x half> undef
- %V32F16 = fneg <32 x half> undef
-
- %NXV1F16 = fneg <vscale x 1 x half> undef
- %NXV2F16 = fneg <vscale x 2 x half> undef
- %NXV4F16 = fneg <vscale x 4 x half> undef
- %NXV8F16 = fneg <vscale x 8 x half> undef
- %NXV16F16 = fneg <vscale x 16 x half> undef
- %NXV32F16 = fneg <vscale x 32 x half> undef
+; NO-ZFHMIN-LABEL: 'fneg_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fneg <2 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fneg <4 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fneg <8 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fneg <16 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fneg <32 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %F16 = fneg half poison
+
+ %V1F16 = fneg <1 x half> poison
+ %V2F16 = fneg <2 x half> poison
+ %V4F16 = fneg <4 x half> poison
+ %V8F16 = fneg <8 x half> poison
+ %V16F16 = fneg <16 x half> poison
+ %V32F16 = fneg <32 x half> poison
+
+ %NXV1F16 = fneg <vscale x 1 x half> poison
+ %NXV2F16 = fneg <vscale x 2 x half> poison
+ %NXV4F16 = fneg <vscale x 4 x half> poison
+ %NXV8F16 = fneg <vscale x 8 x half> poison
+ %NXV16F16 = fneg <vscale x 16 x half> poison
+ %NXV32F16 = fneg <vscale x 32 x half> poison
+
+ %V1F16_VP = call <1 x half> @llvm.vp.fneg(<1 x half> poison, <1 x i1> poison, i32 poison)
+ %V2F16_VP = call <2 x half> @llvm.vp.fneg(<2 x half> poison, <2 x i1> poison, i32 poison)
+ %V4F16_VP = call <4 x half> @llvm.vp.fneg(<4 x half> poison, <4 x i1> poison, i32 poison)
+ %V8F16_VP = call <8 x half> @llvm.vp.fneg(<8 x half> poison, <8 x i1> poison, i32 poison)
+ %V16F16_VP = call <16 x half> @llvm.vp.fneg(<16 x half> poison, <16 x i1> poison, i32 poison)
+
+ %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+ %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+ %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+ %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+ %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
ret void
}
define void @fcopysign() {
; CHECK-LABEL: 'fcopysign'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> undef, <1 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.copysign.v1f64(<1 x double> undef, <1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> poison, <1 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> poison, <2 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> poison, <8 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> poison, <16 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.copysign.v1f64(<1 x double> poison, <1 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> poison, <2 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> poison, <4 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> poison, <8 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %BF16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
- %F32 = call float @llvm.copysign.f32(float undef, float undef)
- %F64 = call double @llvm.copysign.f64(double undef, double undef)
-
- %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef)
- %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
- %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
- %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
- %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef)
-
- %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
- %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
- %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
- %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
- %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
-
- %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> undef, <1 x float> undef)
- %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
- %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
- %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
- %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
-
- %NXV1F32 = call <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
- %NXV2F32 = call <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
- %NXV4F32 = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
- %NXV8F32 = call <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
- %NXV16F32 = call <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
-
- %V1F64 = call <1 x double> @llvm.copysign.v1f64(<1 x double> undef, <1 x double> undef)
- %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
- %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
- %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-
- %NXV1F64 = call <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
- %NXV2F64 = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
- %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
- %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ %F32 = call float @llvm.copysign.f32(float poison, float poison)
+ %F64 = call double @llvm.copysign.f64(double poison, double poison)
+
+ %V1F32 = call <1 x float> @llvm.copysign(<1 x float> poison, <1 x float> poison)
+ %V2F32 = call <2 x float> @llvm.copysign(<2 x float> poison, <2 x float> poison)
+ %V4F32 = call <4 x float> @llvm.copysign(<4 x float> poison, <4 x float> poison)
+ %V8F32 = call <8 x float> @llvm.copysign(<8 x float> poison, <8 x float> poison)
+ %V16F32 = call <16 x float> @llvm.copysign(<16 x float> poison, <16 x float> poison)
+
+ %NXV1F32 = call <vscale x 1 x float> @llvm.copysign(<vscale x 1 x float> poison, <vscale x 1 x float> poison)
+ %NXV2F32 = call <vscale x 2 x float> @llvm.copysign(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ %NXV4F32 = call <vscale x 4 x float> @llvm.copysign(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ %NXV8F32 = call <vscale x 8 x float> @llvm.copysign(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+ %NXV16F32 = call <vscale x 16 x float> @llvm.copysign(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+
+ %V1F64 = call <1 x double> @llvm.copysign(<1 x double> poison, <1 x double> poison)
+ %V2F64 = call <2 x double> @llvm.copysign(<2 x double> poison, <2 x double> poison)
+ %V4F64 = call <4 x double> @llvm.copysign(<4 x double> poison, <4 x double> poison)
+ %V8F64 = call <8 x double> @llvm.copysign(<8 x double> poison, <8 x double> poison)
+
+ %NXV1F64 = call <vscale x 1 x double> @llvm.copysign(<vscale x 1 x double> poison, <vscale x 1 x double> poison)
+ %NXV2F64 = call <vscale x 2 x double> @llvm.copysign(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ %NXV4F64 = call <vscale x 4 x double> @llvm.copysign(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+ %NXV8F64 = call <vscale x 8 x double> @llvm.copysign(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+
+ ret void
+}
+
+define void @fcopysign_bf16() {
+; ZVFH-LABEL: 'fcopysign_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fcopysign_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fcopysign_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = call bfloat @llvm.copysign(bfloat poison, bfloat poison)
+
+ %V1BF16 = call <1 x bfloat> @llvm.copysign(<1 x bfloat> poison, <1 x bfloat> poison)
+ %V2BF16 = call <2 x bfloat> @llvm.copysign(<2 x bfloat> poison, <2 x bfloat> poison)
+ %V4BF16 = call <4 x bfloat> @llvm.copysign(<4 x bfloat> poison, <4 x bfloat> poison)
+ %V8BF16 = call <8 x bfloat> @llvm.copysign(<8 x bfloat> poison, <8 x bfloat> poison)
+ %V16BF16 = call <16 x bfloat> @llvm.copysign(<16 x bfloat> poison, <16 x bfloat> poison)
+ %V32BF16 = call <32 x bfloat> @llvm.copysign(<32 x bfloat> poison, <32 x bfloat> poison)
+
+ %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+ %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+ %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+ %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+ %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+ %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
ret void
}
define void @fcopysign_f16() {
; ZVFH-LABEL: 'fcopysign_f16'
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> poison, <1 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> poison, <2 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> poison, <4 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> poison, <8 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> poison, <16 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> poison, <32 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison)
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'fcopysign_f16'
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> poison, <1 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> poison, <2 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> poison, <4 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> poison, <8 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> poison, <16 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> poison, <32 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = call half @llvm.copysign.f16(half undef, half undef)
-
- %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
- %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
- %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
- %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
- %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
- %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
-
- %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
- %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
- %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
- %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
- %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
- %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
+; NO-ZFHMIN-LABEL: 'fcopysign_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> poison, <1 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> poison, <2 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> poison, <4 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> poison, <8 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> poison, <16 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> poison, <32 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %F16 = call half @llvm.copysign(half poison, half poison)
+
+ %V1F16 = call <1 x half> @llvm.copysign(<1 x half> poison, <1 x half> poison)
+ %V2F16 = call <2 x half> @llvm.copysign(<2 x half> poison, <2 x half> poison)
+ %V4F16 = call <4 x half> @llvm.copysign(<4 x half> poison, <4 x half> poison)
+ %V8F16 = call <8 x half> @llvm.copysign(<8 x half> poison, <8 x half> poison)
+ %V16F16 = call <16 x half> @llvm.copysign(<16 x half> poison, <16 x half> poison)
+ %V32F16 = call <32 x half> @llvm.copysign(<32 x half> poison, <32 x half> poison)
+
+ %NXV1F16 = call <vscale x 1 x half> @llvm.copysign(<vscale x 1 x half> poison, <vscale x 1 x half> poison)
+ %NXV2F16 = call <vscale x 2 x half> @llvm.copysign(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+ %NXV4F16 = call <vscale x 4 x half> @llvm.copysign(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ %NXV8F16 = call <vscale x 8 x half> @llvm.copysign(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ %NXV16F16 = call <vscale x 16 x half> @llvm.copysign(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+ %NXV32F16 = call <vscale x 32 x half> @llvm.copysign(<vscale x 32 x half> poison, <vscale x 32 x half> poison)
ret void
}
define void @fma() {
; CHECK-LABEL: 'fma'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> undef, <1 x float> undef, <1 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.fma.v1f64(<1 x double> undef, <1 x double> undef, <1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.fma.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float poison, float poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double poison, double poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> poison, <1 x float> poison, <1 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> poison, <2 x float> poison, <2 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> poison, <8 x float> poison, <8 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> poison, <16 x float> poison, <16 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.fma.v1f64(<1 x double> poison, <1 x double> poison, <1 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> poison, <2 x double> poison, <2 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> poison, <4 x double> poison, <4 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> poison, <8 x double> poison, <8 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = call <vscale x 1 x double> @llvm.fma.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %BF16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef)
- %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
- %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
-
- %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x bfloat> undef)
- %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
- %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
- %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef)
- %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
-
- %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
- %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
- %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
- %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
- %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
-
- %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> undef, <1 x float> undef, <1 x float> undef)
- %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
- %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
- %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
- %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
-
- %NXV1F32 = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
- %NXV2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
- %NXV4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
- %NXV8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
- %NXV16F32 = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
-
- %V1F64 = call <1 x double> @llvm.fma.v1f64(<1 x double> undef, <1 x double> undef, <1 x double> undef)
- %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
- %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
- %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
-
- %NXV1F64 = call <vscale x 1 x double> @llvm.fma.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef)
- %NXV2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
- %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
- %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
+ %F32 = call float @llvm.fma(float poison, float poison, float poison)
+ %F64 = call double @llvm.fma(double poison, double poison, double poison)
+
+ %V1F32 = call <1 x float> @llvm.fma(<1 x float> poison, <1 x float> poison, <1 x float> poison)
+ %V2F32 = call <2 x float> @llvm.fma(<2 x float> poison, <2 x float> poison, <2 x float> poison)
+ %V4F32 = call <4 x float> @llvm.fma(<4 x float> poison, <4 x float> poison, <4 x float> poison)
+ %V8F32 = call <8 x float> @llvm.fma(<8 x float> poison, <8 x float> poison, <8 x float> poison)
+ %V16F32 = call <16 x float> @llvm.fma(<16 x float> poison, <16 x float> poison, <16 x float> poison)
+
+ %NXV1F32 = call <vscale x 1 x float> @llvm.fma(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison)
+ %NXV2F32 = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ %NXV4F32 = call <vscale x 4 x float> @llvm.fma(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ %NXV8F32 = call <vscale x 8 x float> @llvm.fma(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
+ %NXV16F32 = call <vscale x 16 x float> @llvm.fma(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison)
+
+ %V1F64 = call <1 x double> @llvm.fma(<1 x double> poison, <1 x double> poison, <1 x double> poison)
+ %V2F64 = call <2 x double> @llvm.fma(<2 x double> poison, <2 x double> poison, <2 x double> poison)
+ %V4F64 = call <4 x double> @llvm.fma(<4 x double> poison, <4 x double> poison, <4 x double> poison)
+ %V8F64 = call <8 x double> @llvm.fma(<8 x double> poison, <8 x double> poison, <8 x double> poison)
+
+ %NXV1F64 = call <vscale x 1 x double> @llvm.fma(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison)
+ %NXV2F64 = call <vscale x 2 x double> @llvm.fma(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ %NXV4F64 = call <vscale x 4 x double> @llvm.fma(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
+ %NXV8F64 = call <vscale x 8 x double> @llvm.fma(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison)
+
+ ret void
+}
+
+define void @fma_bf16() {
+; ZVFH-LABEL: 'fma_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fma_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fma_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %BF16 = call bfloat @llvm.fma(bfloat poison, bfloat poison, bfloat poison)
+
+ %V1BF16 = call <1 x bfloat> @llvm.fma(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+ %V2BF16 = call <2 x bfloat> @llvm.fma(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+ %V4BF16 = call <4 x bfloat> @llvm.fma(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+ %V8BF16 = call <8 x bfloat> @llvm.fma(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+ %V16BF16 = call <16 x bfloat> @llvm.fma(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+ %V32BF16 = call <32 x bfloat> @llvm.fma(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+
+ %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+ %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+ %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+ %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+ %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+ %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
ret void
}
define void @fma_f16() {
-; CHECK-LABEL: 'fma_f16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fma_f16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fma_f16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fma_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
-
- %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef)
- %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
- %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
- %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
- %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
- %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef)
-
- %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
- %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
- %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
- %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
- %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
- %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef)
+ %F16 = call half @llvm.fma(half poison, half poison, half poison)
+
+ %V1F16 = call <1 x half> @llvm.fma(<1 x half> poison, <1 x half> poison, <1 x half> poison)
+ %V2F16 = call <2 x half> @llvm.fma(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+ %V4F16 = call <4 x half> @llvm.fma(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+ %V8F16 = call <8 x half> @llvm.fma(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+ %V16F16 = call <16 x half> @llvm.fma(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+ %V32F16 = call <32 x half> @llvm.fma(<32 x half> poison, <32 x half> poison, <32 x half> poison)
+
+ %NXV1F16 = call <vscale x 1 x half> @llvm.fma(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+ %NXV2F16 = call <vscale x 2 x half> @llvm.fma(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+ %NXV4F16 = call <vscale x 4 x half> @llvm.fma(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ %NXV8F16 = call <vscale x 8 x half> @llvm.fma(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ %NXV16F16 = call <vscale x 16 x half> @llvm.fma(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+ %NXV32F16 = call <vscale x 32 x half> @llvm.fma(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
ret void
}
define void @fmuladd() {
; CHECK-LABEL: 'fmuladd'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat undef, bfloat undef, bfloat undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call float @llvm.fmuladd.f32(float undef, float undef, float undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call double @llvm.fmuladd.f64(double undef, double undef, double undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %25 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %28 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %30 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.fmuladd.f32(float poison, float poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call double @llvm.fmuladd.f64(double poison, double poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> poison, <2 x float> poison, <2 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> poison, <8 x float> poison, <8 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> poison, <16 x float> poison, <16 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> poison, <2 x double> poison, <2 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> poison, <4 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> poison, <8 x double> poison, <8 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %10 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> poison, <16 x double> poison, <16 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %15 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %20 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison, <vscale x 16 x double> poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call bfloat @llvm.fmuladd.bf16(bfloat undef, bfloat undef, bfloat undef)
- call float @llvm.fmuladd.f32(float undef, float undef, float undef)
- call double @llvm.fmuladd.f64(double undef, double undef, double undef)
- call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
- call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
- call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef)
- call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
- call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
- call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
- call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
- call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
- call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
- call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
- call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
- call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef)
- call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
- call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
- call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
- call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
- call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
- call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
- call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
- call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
- call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
- call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
- call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef)
- call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
- call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
- call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
- call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef)
+ call float @llvm.fmuladd(float poison, float poison, float poison)
+ call double @llvm.fmuladd(double poison, double poison, double poison)
+
+ call <2 x float> @llvm.fmuladd(<2 x float> poison, <2 x float> poison, <2 x float> poison)
+ call <4 x float> @llvm.fmuladd(<4 x float> poison, <4 x float> poison, <4 x float> poison)
+ call <8 x float> @llvm.fmuladd(<8 x float> poison, <8 x float> poison, <8 x float> poison)
+ call <16 x float> @llvm.fmuladd(<16 x float> poison, <16 x float> poison, <16 x float> poison)
+
+ call <2 x double> @llvm.fmuladd(<2 x double> poison, <2 x double> poison, <2 x double> poison)
+ call <4 x double> @llvm.fmuladd(<4 x double> poison, <4 x double> poison, <4 x double> poison)
+ call <8 x double> @llvm.fmuladd(<8 x double> poison, <8 x double> poison, <8 x double> poison)
+ call <16 x double> @llvm.fmuladd(<16 x double> poison, <16 x double> poison, <16 x double> poison)
+
+ call <vscale x 1 x float> @llvm.fmuladd(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison)
+ call <vscale x 2 x float> @llvm.fmuladd(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ call <vscale x 4 x float> @llvm.fmuladd(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ call <vscale x 8 x float> @llvm.fmuladd(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
+ call <vscale x 16 x float> @llvm.fmuladd(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison)
+
+ call <vscale x 1 x double> @llvm.fmuladd(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison)
+ call <vscale x 2 x double> @llvm.fmuladd(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ call <vscale x 4 x double> @llvm.fmuladd(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
+ call <vscale x 8 x double> @llvm.fmuladd(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison)
+ call <vscale x 16 x double> @llvm.fmuladd(<vscale x 16 x double> poison, <vscale x 16 x double> poison, <vscale x 16 x double> poison)
+
+ ret void
+}
+
+define void @fmuladd_bf16() {
+; ZVFH-LABEL: 'fmuladd_bf16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fmuladd_bf16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fmuladd_bf16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call bfloat @llvm.fmuladd(bfloat poison, bfloat poison, bfloat poison)
+
+ call <2 x bfloat> @llvm.fmuladd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+ call <4 x bfloat> @llvm.fmuladd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+ call <8 x bfloat> @llvm.fmuladd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+ call <16 x bfloat> @llvm.fmuladd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+
+ call <vscale x 1 x bfloat> @llvm.fmuladd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+ call <vscale x 2 x bfloat> @llvm.fmuladd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+ call <vscale x 4 x bfloat> @llvm.fmuladd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+ call <vscale x 8 x bfloat> @llvm.fmuladd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+ call <vscale x 16 x bfloat> @llvm.fmuladd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
ret void
}
define void @fmuladd_f16() {
-; CHECK-LABEL: 'fmuladd_f16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fmuladd_f16'
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fmuladd_f16'
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fmuladd_f16'
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call half @llvm.fmuladd.f16(half undef, half undef, half undef)
- call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
- call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
- call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
- call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
- call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
- call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
- call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
- call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
- call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+ call half @llvm.fmuladd(half poison, half poison, half poison)
+
+ call <2 x half> @llvm.fmuladd(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+ call <4 x half> @llvm.fmuladd(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+ call <8 x half> @llvm.fmuladd(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+ call <16 x half> @llvm.fmuladd(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+
+ call <vscale x 1 x half> @llvm.fmuladd(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+ call <vscale x 2 x half> @llvm.fmuladd(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+ call <vscale x 4 x half> @llvm.fmuladd(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ call <vscale x 8 x half> @llvm.fmuladd(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ call <vscale x 16 x half> @llvm.fmuladd(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
ret void
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll
index 892277a..68c89c3 100644
--- a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll
@@ -13,14 +13,14 @@ define void @fixed() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 223 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
index 40e27c3..e3305c0 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
@@ -81,49 +81,27 @@ define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %
}
define void @vector_reverse() {
-; ARGBASED-LABEL: 'vector_reverse'
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %reverse_nxv16i32 = call <vscale x 16 x i32> @llvm.vector.reverse.nxv16i32(<vscale x 16 x i32> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv32i32 = call <vscale x 32 x i32> @llvm.vector.reverse.nxv32i32(<vscale x 32 x i32> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %reverse_nxv8i64 = call <vscale x 8 x i64> @llvm.vector.reverse.nxv8i64(<vscale x 8 x i64> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv16i64 = call <vscale x 16 x i64> @llvm.vector.reverse.nxv16i64(<vscale x 16 x i64> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %reverse_nxv32i64 = call <vscale x 32 x i64> @llvm.vector.reverse.nxv32i64(<vscale x 32 x i64> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'vector_reverse'
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i32 = call <vscale x 16 x i32> @llvm.vector.reverse.nxv16i32(<vscale x 16 x i32> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv32i32 = call <vscale x 32 x i32> @llvm.vector.reverse.nxv32i32(<vscale x 32 x i32> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv8i64 = call <vscale x 8 x i64> @llvm.vector.reverse.nxv8i64(<vscale x 8 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i64 = call <vscale x 16 x i64> @llvm.vector.reverse.nxv16i64(<vscale x 16 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv32i64 = call <vscale x 32 x i64> @llvm.vector.reverse.nxv32i64(<vscale x 32 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-LABEL: 'vector_reverse'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %reverse_nxv16i32 = call <vscale x 16 x i32> @llvm.vector.reverse.nxv16i32(<vscale x 16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv32i32 = call <vscale x 32 x i32> @llvm.vector.reverse.nxv32i32(<vscale x 32 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %reverse_nxv8i64 = call <vscale x 8 x i64> @llvm.vector.reverse.nxv8i64(<vscale x 8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %reverse_nxv16i64 = call <vscale x 16 x i64> @llvm.vector.reverse.nxv16i64(<vscale x 16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %reverse_nxv32i64 = call <vscale x 32 x i64> @llvm.vector.reverse.nxv32i64(<vscale x 32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'vector_reverse'
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index 4bb4818..71746ca 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -1873,155 +1873,80 @@ define void @is.fpclass() {
}
define void @reverse() {
-; ARGBASED-LABEL: 'reverse'
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %31 = call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %32 = call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %34 = call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %35 = call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %36 = call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
-; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'reverse'
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %2 = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %3 = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %4 = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %26 = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %27 = call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %28 = call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %30 = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %31 = call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %32 = call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %33 = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %34 = call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %35 = call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %36 = call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-LABEL: 'reverse'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.reverse.v2i8(<2 x i8> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.reverse.v4i8(<4 x i8> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.reverse.v8i8(<8 x i8> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.reverse.v2i16(<2 x i16> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.reverse.v4i16(<4 x i16> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.reverse.v16i16(<16 x i16> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.reverse.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.reverse.v16i32(<16 x i32> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.reverse.v4i64(<4 x i64> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.reverse.v8i64(<8 x i64> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.reverse.v16i64(<16 x i64> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.reverse.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.reverse.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.reverse.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call <2 x half> @llvm.experimental.vp.reverse.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = call <8 x half> @llvm.experimental.vp.reverse.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call <16 x half> @llvm.experimental.vp.reverse.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call <2 x float> @llvm.experimental.vp.reverse.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %31 = call <8 x float> @llvm.experimental.vp.reverse.v8f32(<8 x float> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %32 = call <16 x float> @llvm.experimental.vp.reverse.v16f32(<16 x float> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %34 = call <4 x double> @llvm.experimental.vp.reverse.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %35 = call <8 x double> @llvm.experimental.vp.reverse.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %36 = call <16 x double> @llvm.experimental.vp.reverse.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.reverse.nxv2i8(<vscale x 2 x i8> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.reverse.nxv4i8(<vscale x 4 x i8> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.reverse.nxv2i16(<vscale x 2 x i16> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.reverse.nxv16i64(<vscale x 16 x i64> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.reverse.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.reverse.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.reverse.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.reverse.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.reverse.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> poison, <2 x i1> poison, i32 poison)
call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> poison, <4 x i1> poison, i32 poison)
diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index 485e7f6..c53a507 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -414,8 +414,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_funnel_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:13 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -423,8 +423,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_funnel_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:11 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -432,8 +432,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512F-LABEL: 'splatvar_funnel_i64'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -441,8 +441,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512BW-LABEL: 'splatvar_funnel_i64'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -450,8 +450,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512DQ-LABEL: 'splatvar_funnel_i64'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -459,8 +459,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i64'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -486,8 +486,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_funnel_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -495,8 +495,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i64'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -532,8 +532,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_funnel_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -541,8 +541,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_funnel_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -550,8 +550,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512F-LABEL: 'splatvar_funnel_i32'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -559,8 +559,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512BW-LABEL: 'splatvar_funnel_i32'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -568,8 +568,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512DQ-LABEL: 'splatvar_funnel_i32'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -577,8 +577,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i32'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -604,8 +604,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_funnel_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -613,8 +613,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i32'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -631,108 +631,108 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:7 Lat:12 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:23 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:25 Lat:45 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
@@ -749,108 +749,108 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:29 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:35 Lat:57 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:69 Lat:113 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:18 SizeLat:22 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:36 Lat:26 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:72 Lat:52 SizeLat:94 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:20 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:21 SizeLat:27 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:42 SizeLat:54 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:16 Lat:31 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:31 Lat:61 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:61 Lat:121 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
@@ -1934,8 +1934,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_rotate_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:18 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:36 SizeLat:44 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1943,8 +1943,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_rotate_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1952,8 +1952,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512-LABEL: 'splatvar_rotate_i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1979,8 +1979,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_rotate_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -2007,8 +2007,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_rotate_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2016,8 +2016,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_rotate_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2025,8 +2025,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512-LABEL: 'splatvar_rotate_i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2052,8 +2052,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_rotate_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2070,99 +2070,99 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSE-LABEL: 'splatvar_rotate_i16'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
@@ -2179,99 +2179,99 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSE-LABEL: 'splatvar_rotate_i8'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:19 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:21 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:42 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:32 Lat:36 SizeLat:46 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:15 SizeLat:10 for: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:12 for: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index af9a91b..1990605 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -414,8 +414,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_funnel_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:13 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -423,8 +423,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_funnel_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:11 SizeLat:11 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:14 SizeLat:14 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:14 Lat:28 SizeLat:28 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -432,8 +432,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512F-LABEL: 'splatvar_funnel_i64'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -441,8 +441,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512BW-LABEL: 'splatvar_funnel_i64'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -450,8 +450,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512DQ-LABEL: 'splatvar_funnel_i64'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:13 SizeLat:9 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -459,8 +459,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i64'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -486,8 +486,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_funnel_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:21 Lat:23 SizeLat:30 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:42 Lat:46 SizeLat:60 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -495,8 +495,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i64'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
@@ -532,8 +532,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_funnel_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -541,8 +541,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_funnel_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -550,8 +550,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512F-LABEL: 'splatvar_funnel_i32'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -559,8 +559,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512BW-LABEL: 'splatvar_funnel_i32'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -568,8 +568,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512DQ-LABEL: 'splatvar_funnel_i32'
; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -577,8 +577,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i32'
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -604,8 +604,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_funnel_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:32 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:64 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -613,8 +613,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i32'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512)
@@ -631,108 +631,108 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:10 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:20 CodeSize:17 Lat:19 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:40 CodeSize:33 Lat:37 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:9 Lat:14 SizeLat:16 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:18 Lat:28 SizeLat:32 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:11 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:9 SizeLat:9 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:20 CodeSize:26 Lat:38 SizeLat:30 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:12 CodeSize:7 Lat:12 SizeLat:11 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:23 SizeLat:21 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:25 Lat:45 SizeLat:41 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:13 Lat:19 SizeLat:19 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:40 CodeSize:25 Lat:37 SizeLat:37 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:12 SizeLat:10 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:19 CodeSize:25 Lat:23 SizeLat:33 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:38 CodeSize:50 Lat:46 SizeLat:66 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
@@ -749,108 +749,108 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSSE3-LABEL: 'splatvar_funnel_i8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:29 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSSE3-NEXT: Cost Model: Found costs of RThru:48 CodeSize:35 Lat:57 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSSE3-NEXT: Cost Model: Found costs of RThru:96 CodeSize:69 Lat:113 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'splatvar_funnel_i8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SSE42-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_funnel_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:16 Lat:18 SizeLat:22 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:36 Lat:26 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:72 Lat:52 SizeLat:94 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_funnel_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:20 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:21 SizeLat:27 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:42 SizeLat:54 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_funnel_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_funnel_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_funnel_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:19 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:24 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:40 CodeSize:73 Lat:56 SizeLat:85 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_funnel_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_funnel_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:26 CodeSize:16 Lat:31 SizeLat:25 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:52 CodeSize:31 Lat:61 SizeLat:49 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:104 CodeSize:61 Lat:121 SizeLat:97 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_funnel_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:24 CodeSize:16 Lat:29 SizeLat:24 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:57 SizeLat:47 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:96 CodeSize:61 Lat:113 SizeLat:93 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_funnel_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:9 CodeSize:7 Lat:12 SizeLat:8 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:23 CodeSize:25 Lat:23 SizeLat:31 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:46 CodeSize:50 Lat:46 SizeLat:62 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_funnel_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:20 SizeLat:20 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:22 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:13 CodeSize:18 Lat:20 SizeLat:23 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
@@ -1934,8 +1934,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX1-LABEL: 'splatvar_rotate_i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:18 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:36 SizeLat:44 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1943,8 +1943,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX2-LABEL: 'splatvar_rotate_i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:11 SizeLat:10 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:22 SizeLat:20 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1952,8 +1952,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; AVX512-LABEL: 'splatvar_rotate_i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -1979,8 +1979,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
;
; XOP-LABEL: 'splatvar_rotate_i64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512)
@@ -2007,8 +2007,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX1-LABEL: 'splatvar_rotate_i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2016,8 +2016,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX2-LABEL: 'splatvar_rotate_i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2025,8 +2025,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; AVX512-LABEL: 'splatvar_rotate_i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2052,8 +2052,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
;
; XOP-LABEL: 'splatvar_rotate_i32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512)
@@ -2070,99 +2070,99 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16
define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) {
; SSE-LABEL: 'splatvar_rotate_i16'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:9 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:17 Lat:18 SizeLat:24 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:24 CodeSize:34 Lat:36 SizeLat:48 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:12 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:22 SizeLat:24 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:7 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:6 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i16'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:7 CodeSize:7 Lat:11 SizeLat:10 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:15 CodeSize:17 Lat:29 SizeLat:21 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i16'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i16'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i16'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:13 SizeLat:13 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:28 CodeSize:17 Lat:25 SizeLat:25 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 3 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i16'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512)
@@ -2179,99 +2179,99 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) {
; SSE-LABEL: 'splatvar_rotate_i8'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SSE-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SSE-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'splatvar_rotate_i8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:15 SizeLat:19 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:28 Lat:21 SizeLat:38 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX1-NEXT: Cost Model: Found costs of RThru:40 CodeSize:56 Lat:42 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'splatvar_rotate_i8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:23 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:32 Lat:36 SizeLat:46 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'splatvar_rotate_i8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512F-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512F-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'splatvar_rotate_i8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512DQ-LABEL: 'splatvar_rotate_i8'
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512DQ-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512DQ-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:9 CodeSize:13 Lat:17 SizeLat:18 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:11 CodeSize:16 Lat:18 SizeLat:21 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:35 CodeSize:64 Lat:47 SizeLat:76 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512DQ-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI2-LABEL: 'splatvar_rotate_i8'
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512VBMI2-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; AVX512VBMI2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SLM-LABEL: 'splatvar_rotate_i8'
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; SLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; SLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; GLM-LABEL: 'splatvar_rotate_i8'
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; GLM-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; GLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; GLM-NEXT: Cost Model: Found costs of RThru:21 CodeSize:14 Lat:26 SizeLat:21 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; GLM-NEXT: Cost Model: Found costs of RThru:42 CodeSize:27 Lat:51 SizeLat:41 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; GLM-NEXT: Cost Model: Found costs of RThru:84 CodeSize:53 Lat:101 SizeLat:81 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'splatvar_rotate_i8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; XOP-NEXT: Cost Model: Found costs of 2 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOP-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:3 SizeLat:3 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; XOP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:7 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; XOP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:16 Lat:14 SizeLat:18 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512GFNI-LABEL: 'splatvar_rotate_i8'
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:14 SizeLat:9 for: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256)
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:12 Lat:6 SizeLat:14 for: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512)
diff --git a/llvm/test/Analysis/CostModel/X86/load-broadcast.ll b/llvm/test/Analysis/CostModel/X86/load-broadcast.ll
index 4cd8e5b..ecf54ce 100644
--- a/llvm/test/Analysis/CostModel/X86/load-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/X86/load-broadcast.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512
-;
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX,AVX512
+;
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1
;
; vXf64
@@ -18,24 +18,24 @@
define <2 x double> @broadcast_load_v2f64_v2f64(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v2f64_v2f64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v2f64_v2f64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst
;
; SSE42-LABEL: 'broadcast_load_v2f64_v2f64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst
;
; AVX-LABEL: 'broadcast_load_v2f64_v2f64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst
;
%load = load <2 x double>, ptr %src
%bcst = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
@@ -44,24 +44,24 @@ define <2 x double> @broadcast_load_v2f64_v2f64(ptr %src) {
define <4 x double> @broadcast_load_v2f64_v4f64(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v2f64_v4f64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v2f64_v4f64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; SSE42-LABEL: 'broadcast_load_v2f64_v4f64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; AVX-LABEL: 'broadcast_load_v2f64_v4f64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
%load = load <2 x double>, ptr %src
%bcst = shufflevector <2 x double> %load, <2 x double> poison, <4 x i32> zeroinitializer
@@ -70,24 +70,24 @@ define <4 x double> @broadcast_load_v2f64_v4f64(ptr %src) {
define <8 x double> @broadcast_load_v2f64_v8f64(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v2f64_v8f64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v2f64_v8f64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; SSE42-LABEL: 'broadcast_load_v2f64_v8f64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; AVX-LABEL: 'broadcast_load_v2f64_v8f64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x double>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
%load = load <2 x double>, ptr %src
%bcst = shufflevector <2 x double> %load, <2 x double> poison, <8 x i32> zeroinitializer
@@ -96,16 +96,16 @@ define <8 x double> @broadcast_load_v2f64_v8f64(ptr %src) {
define <2 x double> @broadcast_load_f64_v2f64(ptr %src) {
; SSE-LABEL: 'broadcast_load_f64_v2f64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <2 x double> poison, double %load, i32 0
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <2 x double> poison, double %load, i32 0
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst
;
; AVX-LABEL: 'broadcast_load_f64_v2f64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <2 x double> poison, double %load, i32 0
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; AVX-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <2 x double> poison, double %load, i32 0
+; AVX-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %bcst
;
%load = load double, ptr %src
%insert = insertelement <2 x double> poison, double %load, i32 0
@@ -115,28 +115,28 @@ define <2 x double> @broadcast_load_f64_v2f64(ptr %src) {
define <4 x double> @broadcast_load_f64_v4f64(ptr %src) {
; SSE-LABEL: 'broadcast_load_f64_v4f64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; AVX1-LABEL: 'broadcast_load_f64_v4f64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; AVX2-LABEL: 'broadcast_load_f64_v4f64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
; AVX512-LABEL: 'broadcast_load_f64_v4f64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <4 x double> poison, double %load, i32 0
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <4 x double> poison, double %load, i32 0
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x double> %bcst
;
%load = load double, ptr %src
%insert = insertelement <4 x double> poison, double %load, i32 0
@@ -146,28 +146,28 @@ define <4 x double> @broadcast_load_f64_v4f64(ptr %src) {
define <8 x double> @broadcast_load_f64_v8f64(ptr %src) {
; SSE-LABEL: 'broadcast_load_f64_v8f64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; SSE-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; AVX1-LABEL: 'broadcast_load_f64_v8f64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; AVX1-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; AVX2-LABEL: 'broadcast_load_f64_v8f64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
; AVX512-LABEL: 'broadcast_load_f64_v8f64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load double, ptr %src, align 8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert = insertelement <8 x double> poison, double %load, i32 0
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x double> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load double, ptr %src, align 8
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %insert = insertelement <8 x double> poison, double %load, i32 0
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %bcst = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x double> %bcst
;
%load = load double, ptr %src
%insert = insertelement <8 x double> poison, double %load, i32 0
@@ -181,14 +181,14 @@ define <8 x double> @broadcast_load_f64_v8f64(ptr %src) {
define <4 x float> @broadcast_load_v4f32_v4f32(ptr %src) {
; SSE-LABEL: 'broadcast_load_v4f32_v4f32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %bcst
;
; AVX-LABEL: 'broadcast_load_v4f32_v4f32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %bcst
;
%load = load <4 x float>, ptr %src
%bcst = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
@@ -197,14 +197,14 @@ define <4 x float> @broadcast_load_v4f32_v4f32(ptr %src) {
define <8 x float> @broadcast_load_v4f32_v8f32(ptr %src) {
; SSE-LABEL: 'broadcast_load_v4f32_v8f32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %bcst
;
; AVX-LABEL: 'broadcast_load_v4f32_v8f32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %bcst
;
%load = load <4 x float>, ptr %src
%bcst = shufflevector <4 x float> %load, <4 x float> poison, <8 x i32> zeroinitializer
@@ -213,14 +213,14 @@ define <8 x float> @broadcast_load_v4f32_v8f32(ptr %src) {
define <16 x float> @broadcast_load_v4f32_v16f32(ptr %src) {
; SSE-LABEL: 'broadcast_load_v4f32_v16f32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %bcst
;
; AVX-LABEL: 'broadcast_load_v4f32_v16f32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x float>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %bcst
;
%load = load <4 x float>, ptr %src
%bcst = shufflevector <4 x float> %load, <4 x float> poison, <16 x i32> zeroinitializer
@@ -229,14 +229,14 @@ define <16 x float> @broadcast_load_v4f32_v16f32(ptr %src) {
define <2 x i64> @broadcast_load_v2i64_v2i64(ptr %src) {
; SSE-LABEL: 'broadcast_load_v2i64_v2i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bcst
;
; AVX-LABEL: 'broadcast_load_v2i64_v2i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %bcst
;
%load = load <2 x i64>, ptr %src
%bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <2 x i32> zeroinitializer
@@ -249,14 +249,14 @@ define <2 x i64> @broadcast_load_v2i64_v2i64(ptr %src) {
define <4 x i64> @broadcast_load_v2i64_v4i64(ptr %src) {
; SSE-LABEL: 'broadcast_load_v2i64_v4i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bcst
;
; AVX-LABEL: 'broadcast_load_v2i64_v4i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %bcst
;
%load = load <2 x i64>, ptr %src
%bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <4 x i32> zeroinitializer
@@ -265,14 +265,14 @@ define <4 x i64> @broadcast_load_v2i64_v4i64(ptr %src) {
define <8 x i64> @broadcast_load_v2i64_v8i64(ptr %src) {
; SSE-LABEL: 'broadcast_load_v2i64_v8i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %bcst
;
; AVX-LABEL: 'broadcast_load_v2i64_v8i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i64>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <2 x i64>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %bcst
;
%load = load <2 x i64>, ptr %src
%bcst = shufflevector <2 x i64> %load, <2 x i64> poison, <8 x i32> zeroinitializer
@@ -285,14 +285,14 @@ define <8 x i64> @broadcast_load_v2i64_v8i64(ptr %src) {
define <4 x i32> @broadcast_load_v4i32_v4i32(ptr %src) {
; SSE-LABEL: 'broadcast_load_v4i32_v4i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bcst
;
; AVX-LABEL: 'broadcast_load_v4i32_v4i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %bcst
;
%load = load <4 x i32>, ptr %src
%bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -301,14 +301,14 @@ define <4 x i32> @broadcast_load_v4i32_v4i32(ptr %src) {
define <8 x i32> @broadcast_load_v4i32_v8i32(ptr %src) {
; SSE-LABEL: 'broadcast_load_v4i32_v8i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bcst
;
; AVX-LABEL: 'broadcast_load_v4i32_v8i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %bcst
;
%load = load <4 x i32>, ptr %src
%bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <8 x i32> zeroinitializer
@@ -317,14 +317,14 @@ define <8 x i32> @broadcast_load_v4i32_v8i32(ptr %src) {
define <16 x i32> @broadcast_load_v4i32_v16i32(ptr %src) {
; SSE-LABEL: 'broadcast_load_v4i32_v16i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bcst
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16
+; SSE-NEXT: Cost Model: Found costs of 1 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %bcst
;
; AVX-LABEL: 'broadcast_load_v4i32_v16i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, ptr %src, align 16
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %bcst
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <4 x i32>, ptr %src, align 16
+; AVX-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %bcst
;
%load = load <4 x i32>, ptr %src
%bcst = shufflevector <4 x i32> %load, <4 x i32> poison, <16 x i32> zeroinitializer
@@ -337,34 +337,34 @@ define <16 x i32> @broadcast_load_v4i32_v16i32(ptr %src) {
define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v8i16_v8i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v8i16_v8i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; SSE42-LABEL: 'broadcast_load_v8i16_v8i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; AVX1-LABEL: 'broadcast_load_v8i16_v8i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; AVX2-LABEL: 'broadcast_load_v8i16_v8i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
; AVX512-LABEL: 'broadcast_load_v8i16_v8i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %bcst
;
%load = load <8 x i16>, ptr %src
%bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -373,34 +373,34 @@ define <8 x i16> @broadcast_load_v8i16_v8i16(ptr %src) {
define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v8i16_v16i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v8i16_v16i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; SSE42-LABEL: 'broadcast_load_v8i16_v16i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; AVX1-LABEL: 'broadcast_load_v8i16_v16i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; AVX2-LABEL: 'broadcast_load_v8i16_v16i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
; AVX512-LABEL: 'broadcast_load_v8i16_v16i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %bcst
;
%load = load <8 x i16>, ptr %src
%bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <16 x i32> zeroinitializer
@@ -409,34 +409,34 @@ define <16 x i16> @broadcast_load_v8i16_v16i16(ptr %src) {
define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v8i16_v32i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v8i16_v32i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; SSE42-LABEL: 'broadcast_load_v8i16_v32i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; AVX1-LABEL: 'broadcast_load_v8i16_v32i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; AVX2-LABEL: 'broadcast_load_v8i16_v32i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
; AVX512-LABEL: 'broadcast_load_v8i16_v32i16'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, ptr %src, align 16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <8 x i16>, ptr %src, align 16
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %bcst
;
%load = load <8 x i16>, ptr %src
%bcst = shufflevector <8 x i16> %load, <8 x i16> poison, <32 x i32> zeroinitializer
@@ -449,34 +449,34 @@ define <32 x i16> @broadcast_load_v8i16_v32i16(ptr %src) {
define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v16i8_v16i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v16i8_v16i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; SSE42-LABEL: 'broadcast_load_v16i8_v16i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; AVX1-LABEL: 'broadcast_load_v16i8_v16i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; AVX2-LABEL: 'broadcast_load_v16i8_v16i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
; AVX512-LABEL: 'broadcast_load_v16i8_v16i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %bcst
;
%load = load <16 x i8>, ptr %src
%bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -485,34 +485,34 @@ define <16 x i8> @broadcast_load_v16i8_v16i8(ptr %src) {
define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v16i8_v32i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v16i8_v32i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; SSE42-LABEL: 'broadcast_load_v16i8_v32i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; AVX1-LABEL: 'broadcast_load_v16i8_v32i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; AVX2-LABEL: 'broadcast_load_v16i8_v32i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
; AVX512-LABEL: 'broadcast_load_v16i8_v32i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %bcst
;
%load = load <16 x i8>, ptr %src
%bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <32 x i32> zeroinitializer
@@ -521,34 +521,34 @@ define <32 x i8> @broadcast_load_v16i8_v32i8(ptr %src) {
define <64 x i8> @broadcast_load_v16i8_v64i8(ptr %src) {
; SSE2-LABEL: 'broadcast_load_v16i8_v64i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; SSSE3-LABEL: 'broadcast_load_v16i8_v64i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; SSE42-LABEL: 'broadcast_load_v16i8_v64i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; AVX1-LABEL: 'broadcast_load_v16i8_v64i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; AVX2-LABEL: 'broadcast_load_v16i8_v64i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX2-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
; AVX512-LABEL: 'broadcast_load_v16i8_v64i8'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, ptr %src, align 16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %bcst
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load <16 x i8>, ptr %src, align 16
+; AVX512-NEXT: Cost Model: Found costs of 0 for: %bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %bcst
;
%load = load <16 x i8>, ptr %src
%bcst = shufflevector <16 x i8> %load, <16 x i8> poison, <64 x i32> zeroinitializer
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
index 3031629..aea7cc8 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
@@ -2032,7 +2032,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX1-LABEL: 'test_gather_16f32_const_mask2'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2040,7 +2040,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2048,7 +2048,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; SKL-LABEL: 'test_gather_16f32_const_mask2'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2056,7 +2056,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX512-LABEL: 'test_gather_16f32_const_mask2'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2091,7 +2091,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX1-LABEL: 'test_scatter_16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX1-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2099,7 +2099,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX2-LABEL: 'test_scatter_16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2107,7 +2107,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; SKL-LABEL: 'test_scatter_16i32'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SKL-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2115,7 +2115,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX512-LABEL: 'test_scatter_16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX512-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 8fae37b3..742b5b2 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -2032,7 +2032,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX1-LABEL: 'test_gather_16f32_const_mask2'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2040,7 +2040,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2048,7 +2048,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; SKL-LABEL: 'test_gather_16f32_const_mask2'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2056,7 +2056,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
;
; AVX512-LABEL: 'test_gather_16f32_const_mask2'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef)
@@ -2091,7 +2091,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX1-LABEL: 'test_scatter_16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX1-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2099,7 +2099,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX2-LABEL: 'test_scatter_16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2107,7 +2107,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; SKL-LABEL: 'test_scatter_16i32'
; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SKL-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
@@ -2115,7 +2115,7 @@ define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32
;
; AVX512-LABEL: 'test_scatter_16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX512-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1>
; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll
index a8ceebf..c4cf8b7 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16.ll
@@ -3,11 +3,11 @@
define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) {
; CHECK-LABEL: 'test_vXf16'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll
index 1a2d098..79e1622 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast.ll
@@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
;
; AVX1-LABEL: 'test_vXf64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer
@@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
;
; AVX1-LABEL: 'test_vXi64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; AVX1-LABEL: 'test_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer
@@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; AVX1-LABEL: 'test_vXi32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -154,41 +154,41 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128
; SSE2-LABEL: 'test_vXf16'
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXf16'
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXf16'
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf16'
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer
@@ -244,49 +244,49 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer
@@ -302,54 +302,54 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi8'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer
@@ -365,46 +365,46 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1>
; SSE2-LABEL: 'test_vXi1'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi1'
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi1'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi1'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi1'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi1'
@@ -440,22 +440,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
; AVX1-LABEL: 'test_upper_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_upper_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_upper_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 2, i32 2>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
index ccfb543..8bb9cbc 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
@@ -32,21 +32,21 @@ define void @shuffle_load() {
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; SSE-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -66,11 +66,11 @@ define void @shuffle_load() {
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; SSE-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; SSE-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -95,21 +95,21 @@ define void @shuffle_load() {
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -129,11 +129,11 @@ define void @shuffle_load() {
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; SSE2-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -158,21 +158,21 @@ define void @shuffle_load() {
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; SSE3-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -192,11 +192,11 @@ define void @shuffle_load() {
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; SSE3-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; SSE3-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; SSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -221,21 +221,21 @@ define void @shuffle_load() {
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; AVX-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -255,11 +255,11 @@ define void @shuffle_load() {
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; AVX-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; AVX-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -284,21 +284,21 @@ define void @shuffle_load() {
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -318,11 +318,11 @@ define void @shuffle_load() {
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; AVX2-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
@@ -347,21 +347,21 @@ define void @shuffle_load() {
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi8 = load <8 x i8>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi8 = load <16 x i8>, ptr undef, align 16
-; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi8 = load <32 x i8>, ptr undef, align 32
-; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_64xi8 = load <64 x i8>, ptr undef, align 64
-; AVX512-NEXT: Cost Model: Found costs of 3 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi16 = load <2 x i16>, ptr undef, align 4
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi16 = load <4 x i16>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xi16 = load <8 x i16>, ptr undef, align 16
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xi16 = load <16 x i16>, ptr undef, align 32
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xi16 = load <32 x i16>, ptr undef, align 64
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xi32 = load <2 x i32>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xi32 = load <4 x i32>, ptr undef, align 16
@@ -381,11 +381,11 @@ define void @shuffle_load() {
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf16 = load <4 x half>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 0 for: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_8xf16 = load <8 x half>, ptr undef, align 16
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %ld_16xf16 = load <16 x half>, ptr undef, align 32
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %ld_32xf16 = load <32 x half>, ptr undef, align 64
-; AVX512-NEXT: Cost Model: Found costs of 2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_2xf32 = load <2 x float>, ptr undef, align 8
; AVX512-NEXT: Cost Model: Found costs of 1 for: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %ld_4xf32 = load <4 x float>, ptr undef, align 16
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll
index 4affc7e..f47e1a3 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll
@@ -3,11 +3,11 @@
define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) {
; CHECK-LABEL: 'test_vXf16'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:4 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll
index 1bcf2d5..aca0ba3 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll
@@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
;
; AVX1-LABEL: 'test_vXf64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:5 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 0>
@@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
;
; AVX1-LABEL: 'test_vXi64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:12 SizeLat:4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:6 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:5 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
@@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; AVX1-LABEL: 'test_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 0>
@@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; AVX1-LABEL: 'test_vXi32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
@@ -154,65 +154,65 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:6 SizeLat:6 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:12 SizeLat:12 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:5 Lat:9 SizeLat:5 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:10 Lat:18 SizeLat:10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:18 SizeLat:8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi16'
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
@@ -227,71 +227,71 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-LABEL: 'test_vXi8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 9 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 9 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 18 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 36 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:11 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:11 Lat:6 SizeLat:11 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:22 Lat:12 SizeLat:22 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:44 Lat:24 SizeLat:44 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:5 Lat:9 SizeLat:5 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:10 Lat:18 SizeLat:10 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:18 SizeLat:8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:9 SizeLat:3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi8'
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
@@ -320,22 +320,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
; AVX1-LABEL: 'test_upper_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_upper_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
+; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:14 SizeLat:8 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_upper_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:7 SizeLat:4 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 2>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll
index 007aa59bc..21d5766 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16.ll
@@ -3,15 +3,15 @@
define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024) {
; CHECK-LABEL: 'test_vXf16'
-; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; CHECK-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
- %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+ %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+ %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
ret void
}
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
index 3727d7b..11f14df 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll
@@ -21,42 +21,42 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
; SSE-LABEL: 'test_vXf64'
; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0>
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXf64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXf64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
%V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
- %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
+ %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 0, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 13, i32 3, i32 11, i32 1, i32 0>
ret void
}
@@ -64,79 +64,93 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
; SSE-LABEL: 'test_vXi64'
; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 5 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
%V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
- %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 5, i32 1, i32 0>
ret void
}
define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
-; SSE-LABEL: 'test_vXf32'
-; SSE-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; SSE2-LABEL: 'test_vXf32'
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+; SSE2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; SSSE3-LABEL: 'test_vXf32'
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; SSE42-LABEL: 'test_vXf32'
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXf32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
%V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
%V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
ret void
}
@@ -144,48 +158,48 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; SSE-LABEL: 'test_vXi32'
; SSE-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; SSE-NEXT: Cost Model: Found costs of 18 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi32'
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; XOP-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX1-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX2-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:2 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
%V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
- %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 2, i32 1, i32 0>
- %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 5, i32 3, i32 0, i32 1, i32 0>
+ %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+ %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret void
}
@@ -193,90 +207,90 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:5 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:24 CodeSize:26 Lat:26 SizeLat:26 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:5 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; SSE2-NEXT: Cost Model: Found costs of RThru:42 CodeSize:56 Lat:56 SizeLat:56 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:72 CodeSize:94 Lat:94 SizeLat:94 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 39 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; SSE42-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 35 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi16'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 63 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 16 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 99 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 10 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 44 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi16'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 84 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi16'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi16'
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
%V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>
- %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 0, i32 1, i32 0>
+ %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 4, i32 15, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 3, i32 6, i32 5, i32 14, i32 7, i32 2, i32 1, i32 16>
+ %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
ret void
}
@@ -286,88 +300,88 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 5 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:19 Lat:19 SizeLat:19 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:35 CodeSize:37 Lat:37 SizeLat:37 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:26 SizeLat:26 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:91 Lat:91 SizeLat:91 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi8'
-; XOP-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; XOP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 12 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 8 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi8'
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi8'
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi8'
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
%V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
%V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 7, i32 7, i32 5, i32 5, i32 3, i32 3, i32 1, i32 1>
%V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 8, i32 30, i32 20, i32 28, i32 27, i32 15, i32 13, i32 24, i32 11, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 26, i32 15, i32 14, i32 25, i32 12, i32 11, i32 23, i32 9, i32 31, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 0, i32 62, i32 61, i32 60, i32 7, i32 58, i32 57, i32 55, i32 56, i32 54, i32 53, i32 52, i32 1, i32 50, i32 47, i32 48, i32 49, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 11, i32 37, i32 36, i32 33, i32 34, i32 35, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 23, i32 24, i32 20, i32 35, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 42, i32 12, i32 34, i32 10, i32 8, i32 9, i32 24, i32 6, i32 5, i32 4, i32 29, i32 2, i32 1, i32 0>
ret void
}
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll
index 6d22546..c98efcd 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll
@@ -25,20 +25,20 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
;
; AVX1-LABEL: 'test_vXf64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -56,20 +56,20 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
;
; AVX1-LABEL: 'test_vXi64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
@@ -89,22 +89,22 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; AVX1-LABEL: 'test_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> <i32 1, i32 1>
@@ -125,22 +125,22 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
; AVX1-LABEL: 'test_vXi32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -154,41 +154,41 @@ define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128
; SSE2-LABEL: 'test_vXf16'
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXf16'
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXf16'
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf16'
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 0 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 1>
@@ -244,49 +244,49 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-LABEL: 'test_vXi16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi16'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
@@ -302,54 +302,54 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi8'
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
@@ -365,46 +365,46 @@ define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1>
; SSE2-LABEL: 'test_vXi1'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; SSE2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi1'
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSSE3-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSSE3-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi1'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; SSE42-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi1'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi1'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi1'
@@ -440,22 +440,22 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a
; AVX1-LABEL: 'test_upper_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_upper_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_upper_vXf32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> <i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> <i32 3, i32 3>
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
index 56afc01..3016316 100644
--- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll
@@ -22,41 +22,41 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXf64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
; XOP-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXf64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; AVX1-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; AVX2-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf64'
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> <i32 3, i32 0>
%V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
%V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
- %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
ret void
}
@@ -65,41 +65,41 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512,
; SSE-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
; SSE-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; SSE-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi64'
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
; XOP-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; XOP-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; XOP-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; AVX1-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; AVX1-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; AVX2-NEXT: Cost Model: Found costs of 11 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 16 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi64'
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
-; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> <i32 3, i32 0>
%V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
%V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
- %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 28, i32 13, i32 10, i32 18, i32 8, i32 8, i32 24, i32 31, i32 4, i32 9, i32 2, i32 8, i32 0>
ret void
}
@@ -107,129 +107,113 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
; SSE2-LABEL: 'test_vXf32'
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of 22 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXf32'
; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 22 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXf32'
; SSE42-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
; SSE42-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 7 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 9 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXf32'
; XOP-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
; XOP-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXf32'
; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXf32'
; AVX2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXf32'
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> <i32 3, i32 0>
%V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
- %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
- %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+ %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+ %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
ret void
}
define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) {
-; SSE2-LABEL: 'test_vXi32'
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; SSE2-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
-;
-; SSSE3-LABEL: 'test_vXi32'
-; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 14 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
-;
-; SSE42-LABEL: 'test_vXi32'
-; SSE42-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; SSE-LABEL: 'test_vXi32'
+; SSE-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
+; SSE-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; SSE-NEXT: Cost Model: Found costs of 23 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
+; SSE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi32'
; XOP-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
; XOP-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi32'
; AVX1-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 17 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 27 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi32'
; AVX2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 13 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 3 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 8 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 21 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512-LABEL: 'test_vXi32'
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:9 SizeLat:3 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:12 SizeLat:4 for: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> <i32 3, i32 0>
%V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
- %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
- %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 11, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
+ %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+ %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 20, i32 53, i32 22, i32 20, i32 19, i32 18, i32 40, i32 16, i32 15, i32 48, i32 13, i32 40, i32 11, i32 11, i32 9, i32 45, i32 33, i32 11, i32 5, i32 4, i32 35, i32 2, i32 33, i32 0>
ret void
}
@@ -238,89 +222,89 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; SSE2-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; SSE2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:11 Lat:11 SizeLat:11 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:27 Lat:27 SizeLat:27 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:30 CodeSize:36 Lat:36 SizeLat:36 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:24 Lat:24 SizeLat:24 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:51 Lat:51 SizeLat:51 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:69 CodeSize:91 Lat:91 SizeLat:91 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi16'
; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 21 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 37 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi16'
; SSE42-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; SSE42-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; SSE42-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 10 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 12 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 33 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi16'
; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 26 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 54 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi16'
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 30 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 42 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 90 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi16'
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 14 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 20 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 17 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 42 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi16'
; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 84 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 168 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi16'
; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi16'
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 4 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 8 for: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> <i32 3, i32 0>
%V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
%V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
- %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
+ %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 29, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 22, i32 2, i32 1, i32 0>
+ %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+ %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 41, i32 40, i32 39, i32 39, i32 42, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 0, i32 1, i32 5, i32 6, i32 2, i32 66, i32 2, i32 6, i32 7>
ret void
}
@@ -330,8 +314,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE2-NEXT: Cost Model: Found costs of 4 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; SSE2-NEXT: Cost Model: Found costs of 7 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:13 Lat:13 SizeLat:13 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:41 Lat:41 SizeLat:41 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:42 Lat:42 SizeLat:42 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; SSE2-NEXT: Cost Model: Found costs of RThru:66 CodeSize:78 Lat:78 SizeLat:78 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSSE3-LABEL: 'test_vXi8'
@@ -339,8 +323,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSSE3-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 12 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; SSSE3-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; SSSE3-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; SSE42-LABEL: 'test_vXi8'
@@ -348,8 +332,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; SSE42-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; SSE42-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; SSE42-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 4 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; SSE42-NEXT: Cost Model: Found costs of 6 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 10 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; SSE42-NEXT: Cost Model: Found costs of 18 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; XOP-LABEL: 'test_vXi8'
@@ -357,8 +341,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; XOP-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; XOP-NEXT: Cost Model: Found costs of 13 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 9 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; XOP-NEXT: Cost Model: Found costs of 36 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; XOP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX1-LABEL: 'test_vXi8'
@@ -366,8 +350,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX1-NEXT: Cost Model: Found costs of 23 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 15 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX1-NEXT: Cost Model: Found costs of 54 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX2-LABEL: 'test_vXi8'
@@ -375,8 +359,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT: Cost Model: Found costs of 11 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX2-NEXT: Cost Model: Found costs of 23 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512F-LABEL: 'test_vXi8'
@@ -384,8 +368,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX512F-NEXT: Cost Model: Found costs of 42 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512BW-LABEL: 'test_vXi8'
@@ -393,8 +377,8 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of 3 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512BW-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 7 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX512BW-NEXT: Cost Model: Found costs of 19 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; AVX512VBMI-LABEL: 'test_vXi8'
@@ -402,15 +386,15 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+; AVX512VBMI-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
; AVX512VBMI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> <i32 3, i32 0>
%V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> <i32 3, i32 6, i32 1, i32 5>
%V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> <i32 7, i32 6, i32 6, i32 8, i32 9, i32 2, i32 1, i32 0>
%V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> <i32 29, i32 14, i32 28, i32 12, i32 11, i32 10, i32 11, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
- %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 33, i32 20, i32 27, i32 28, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 7, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 40, i32 55, i32 11, i32 9, i32 45, i32 4, i32 11, i32 4, i32 5, i32 35, i32 2, i32 33, i32 0>
+ %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 124, i32 62, i32 71, i32 127, i32 58, i32 60, i32 55, i32 56, i32 57, i32 54, i32 127, i32 52, i32 50, i32 51, i32 49, i32 48, i32 47, i32 45, i32 44, i32 44, i32 100, i32 42, i32 41, i32 39, i32 39, i32 40, i32 37, i32 38, i32 35, i32 34, i32 32, i32 33, i32 31, i32 27, i32 30, i32 28, i32 29, i32 26, i32 25, i32 24, i32 23, i32 99, i32 21, i32 20, i32 19, i32 18, i32 17, i32 72, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 5, i32 6, i32 4, i32 66, i32 2, i32 1, i32 0>
ret void
}
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
index bd642791..c53da90 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
@@ -531,31 +531,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -574,31 +574,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -660,31 +660,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -703,31 +703,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -740,43 +740,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -789,43 +789,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -838,67 +838,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -911,67 +911,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -984,67 +984,67 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:16 Lat:11 SizeLat:21 for: %shift = ashr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1057,67 +1057,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:32 Lat:22 SizeLat:42 for: %shift = ashr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index fdf011e..7a06269 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -531,31 +531,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:7 SizeLat:13 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:6 SizeLat:9 for: %shift = ashr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -574,31 +574,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:20 Lat:14 SizeLat:26 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:12 SizeLat:18 for: %shift = ashr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = ashr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -660,31 +660,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -703,31 +703,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -740,43 +740,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = ashr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -789,43 +789,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = ashr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -838,67 +838,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = ashr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = ashr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = ashr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = ashr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -911,67 +911,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:15 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:9 Lat:5 SizeLat:13 for: %shift = ashr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:8 Lat:12 SizeLat:12 for: %shift = ashr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -984,67 +984,67 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:18 Lat:30 SizeLat:26 for: %shift = ashr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:10 CodeSize:16 Lat:11 SizeLat:21 for: %shift = ashr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:11 Lat:9 SizeLat:16 for: %shift = ashr <32 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:13 for: %shift = ashr <32 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1057,67 +1057,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:36 Lat:60 SizeLat:52 for: %shift = ashr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:32 Lat:22 SizeLat:42 for: %shift = ashr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:22 Lat:18 SizeLat:32 for: %shift = ashr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:37 CodeSize:51 Lat:37 SizeLat:63 for: %shift = ashr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:5 CodeSize:10 Lat:10 SizeLat:15 for: %shift = ashr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
index 2f70a4b..ee7546f 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
@@ -527,31 +527,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -570,31 +570,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -656,31 +656,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -699,31 +699,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -736,43 +736,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -785,43 +785,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -834,67 +834,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -907,67 +907,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -980,43 +980,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:9 SizeLat:14 for: %shift = lshr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1029,67 +1029,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:18 SizeLat:28 for: %shift = lshr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 78fd9f33..a508c2a 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -527,31 +527,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -570,31 +570,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = lshr <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = lshr <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -656,31 +656,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -699,31 +699,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -736,43 +736,43 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = lshr <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -785,43 +785,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = lshr <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -834,67 +834,67 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = lshr <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = lshr <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = lshr <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = lshr <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -907,67 +907,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:5 Lat:13 SizeLat:9 for: %shift = lshr <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:8 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v16i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v16i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v16i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:9 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v16i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v16i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:10 SizeLat:8 for: %shift = lshr <16 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -980,43 +980,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:10 Lat:26 SizeLat:18 for: %shift = lshr <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:10 Lat:9 SizeLat:14 for: %shift = lshr <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:7 SizeLat:6 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:9 for: %shift = lshr <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1029,67 +1029,67 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:52 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:20 Lat:18 SizeLat:28 for: %shift = lshr <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:14 SizeLat:12 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:14 Lat:16 SizeLat:18 for: %shift = lshr <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:30 Lat:19 SizeLat:36 for: %shift = lshr <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:7 Lat:8 SizeLat:10 for: %shift = lshr <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
index adedca8..375dac5 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
@@ -567,31 +567,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -610,31 +610,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> poison, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -696,31 +696,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -739,31 +739,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> poison, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -776,49 +776,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v8i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> poison, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -831,49 +831,49 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> poison, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -886,73 +886,73 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> poison, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -965,49 +965,49 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:8 for: %shift = shl <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> poison, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -1020,49 +1020,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:11 Lat:8 SizeLat:14 for: %shift = shl <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> poison, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1075,73 +1075,73 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:22 Lat:16 SizeLat:28 for: %shift = shl <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v64i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> poison, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index a3536ec..b2435be 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -567,31 +567,31 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v4i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v4i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:7 SizeLat:6 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v4i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <4 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %shift
;
@@ -610,31 +610,31 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i64'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i64'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:14 SizeLat:12 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i64'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:8 SizeLat:4 for: %shift = shl <8 x i64> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i64'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:2 for: %shift = shl <8 x i64> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i64> %shift
;
@@ -696,31 +696,31 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <8 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %shift
;
@@ -739,31 +739,31 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i32'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i32'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i32'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <16 x i32> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i32'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i32> %shift
;
@@ -776,49 +776,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v8i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v8i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v8i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v8i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v8i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v8i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <8 x i16> undef, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %shift = shl <8 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %shift
;
@@ -831,49 +831,49 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:7 SizeLat:7 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i16'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <16 x i16> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i16> undef, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %shift = shl <16 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %shift
;
@@ -886,73 +886,73 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) {
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i16'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:2 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i16'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i16'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i16'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i16'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:10 Lat:14 SizeLat:14 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i16'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:8 SizeLat:6 for: %shift = shl <32 x i16> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v32i16'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v32i16'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v32i16'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:10 SizeLat:7 for: %shift = shl <32 x i16> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v32i16'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i16'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %shift = shl <32 x i16> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v32i16'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i16> undef, i16 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:3 for: %shift = shl <32 x i16> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i16> %shift
;
@@ -965,49 +965,49 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:8 for: %shift = shl <16 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v16i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v16i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v16i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v16i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:5 Lat:5 SizeLat:7 for: %shift = shl <16 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v16i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:10 SizeLat:9 for: %shift = shl <16 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %shift
;
@@ -1020,49 +1020,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v32i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v32i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:11 Lat:8 SizeLat:14 for: %shift = shl <32 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v32i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v32i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:7 SizeLat:6 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v32i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; AVX512-LABEL: 'splatvar_shift_v32i8'
; AVX512-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; AVX512-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <32 x i8> %a, %splat
; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v32i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:12 Lat:20 SizeLat:18 for: %shift = shl <32 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %shift
;
@@ -1075,73 +1075,73 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE2-NEXT: Cost Model: Found costs of 3 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:4 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SSE42-LABEL: 'splatvar_shift_v64i8'
; SSE42-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SSE42-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX1-LABEL: 'splatvar_shift_v64i8'
; AVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:22 Lat:16 SizeLat:28 for: %shift = shl <64 x i8> %a, %splat
; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX2-LABEL: 'splatvar_shift_v64i8'
; AVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX1-LABEL: 'splatvar_shift_v64i8'
; XOPAVX1-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX1-NEXT: Cost Model: Found costs of 2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:6 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:14 SizeLat:12 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; XOPAVX2-LABEL: 'splatvar_shift_v64i8'
; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; XOPAVX2-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; XOPAVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:14 SizeLat:16 for: %shift = shl <64 x i8> %a, %splat
; XOPAVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512F-LABEL: 'splatvar_shift_v64i8'
; AVX512F-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512F-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512F-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512F-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512F-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BW-LABEL: 'splatvar_shift_v64i8'
; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BW-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BW-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BW-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BW-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512VL-LABEL: 'splatvar_shift_v64i8'
; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512VL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512VL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512VL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:27 Lat:19 SizeLat:33 for: %shift = shl <64 x i8> %a, %splat
; AVX512VL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512BWVL-LABEL: 'splatvar_shift_v64i8'
; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512BWVL-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512BWVL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; SLM-LABEL: 'splatvar_shift_v64i8'
; SLM-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; SLM-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; SLM-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:3 SizeLat:2 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:24 Lat:40 SizeLat:36 for: %shift = shl <64 x i8> %a, %splat
; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
; AVX512GFNI-LABEL: 'splatvar_shift_v64i8'
; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
-; AVX512GFNI-NEXT: Cost Model: Found costs of 1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
+; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:7 SizeLat:8 for: %shift = shl <64 x i8> %a, %splat
; AVX512GFNI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <64 x i8> %shift
;
diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
index d330152..e0def90 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -113,7 +113,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
; CHECK-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - confused!
;
; NORMALIZE-LABEL: 'banerjee1'
; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
@@ -127,7 +127,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
; NORMALIZE-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
; NORMALIZE-NEXT: da analyze - confused!
; NORMALIZE-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
-; NORMALIZE-NEXT: da analyze - output [* *]!
+; NORMALIZE-NEXT: da analyze - confused!
;
; DELIN-LABEL: 'banerjee1'
; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
@@ -141,7 +141,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
; DELIN-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
; DELIN-NEXT: da analyze - confused!
; DELIN-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8
-; DELIN-NEXT: da analyze - output [* *]!
+; DELIN-NEXT: da analyze - confused!
;
entry:
%cmp4 = icmp sgt i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll
index 3e3426a..bf2268b 100644
--- a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll
@@ -8,11 +8,11 @@
define float @bug41488_test1(float %f) {
; CHECK-LABEL: 'bug41488_test1'
; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4
-; CHECK-NEXT: da analyze - input [*]!
+; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float %f, ptr %q, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store float %f, ptr %q, align 4 --> Dst: store float %f, ptr %q, align 4
-; CHECK-NEXT: da analyze - output [*]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%g = alloca float, align 4
@@ -34,11 +34,11 @@ for.cond.cleanup:
define void @bug41488_test2(i32 %n) {
; CHECK-LABEL: 'bug41488_test2'
; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4
-; CHECK-NEXT: da analyze - input [*]!
+; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float 0.000000e+00, ptr %q, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store float 0.000000e+00, ptr %q, align 4 --> Dst: store float 0.000000e+00, ptr %q, align 4
-; CHECK-NEXT: da analyze - output [*]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%g = alloca float, align 4
@@ -68,7 +68,7 @@ define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonl
; CHECK-NEXT: Src: %.pre = load double, ptr %B, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store double %.pre, ptr %arrayidx2, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8
-; CHECK-NEXT: da analyze - output [*]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%cmp8 = icmp sgt i32 %n, 1
@@ -99,11 +99,11 @@ for.body: ; preds = %for.body.preheader,
define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) {
; CHECK-LABEL: 'bug53942_bar'
; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: %0 = load double, ptr %arrayidx, align 8
-; CHECK-NEXT: da analyze - input [*]!
+; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store double %0, ptr %arrayidx8, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8
-; CHECK-NEXT: da analyze - output [*]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
br label %for.cond
@@ -166,14 +166,14 @@ for.end: ; preds = %for.cond.cleanup
; (j % 2 == 0 ? A[i][j] : A[i][j+1]) = 1;
; }
;
-; FIXME: There are loop-carried dependencies between the store instruction. For
+; There are loop-carried dependencies between the store instruction. For
; example, the value of %ptr0 when (i, j) = (0, 1) is %A+8, which is the same
; as when (i, j) = (0, 2).
define void @non_invariant_baseptr_with_identical_obj(ptr %A) {
; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj'
; CHECK-NEXT: Src: store i32 1, ptr %idx, align 4 --> Dst: store i32 1, ptr %idx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - confused!
;
entry:
br label %loop.i.header
@@ -216,13 +216,13 @@ exit:
; Similar to the above case, but ptr0 is loop-invariant with respsect to the
; k-loop.
;
-; FIXME: Same as the above case, there are loop-carried dependencies between
-; the store.
+; Same as the above case, there are loop-carried dependencies between the
+; store.
define void @non_invariant_baseptr_with_identical_obj2(ptr %A) {
; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj2'
; CHECK-NEXT: Src: store i32 1, ptr %idx, align 4 --> Dst: store i32 1, ptr %idx, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - confused!
;
entry:
br label %loop.i.header
@@ -259,3 +259,58 @@ loop.i.latch:
exit:
ret void
}
+
+; Pseudo-code that is approximately semantically equivalent to the below IR:
+;
+; void f(int A[][32]) {
+; for (int i = 0; i < 100; i++)
+; for (int j = 0; j < 15; j++) {
+; int offset = (j % 2 == 0) ? 1 : 0;
+; A[i][2 * j + offset + 0] = 1;
+; A[i][2 * j + offset + 1] = 1;
+; }
+; }
+;
+; There are loop-carried dependencies between the two stores. For example,
+; A[0][2] is accessed from both the former one when (i, j) = (0, 1) and the
+; latter one when (i, j) = (0, 0).
+;
+define void @non_invariant_baseptr_with_identical_obj3(ptr %A) {
+; CHECK-LABEL: 'non_invariant_baseptr_with_identical_obj3'
+; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4
+; CHECK-NEXT: da analyze - confused!
+; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4
+; CHECK-NEXT: da analyze - confused!
+;
+entry:
+ br label %loop.i.header
+
+loop.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.i.latch ]
+ %A1 = getelementptr i32, ptr %A, i32 1
+ br label %loop.j
+
+loop.j:
+ %j = phi i32 [ 0, %loop.i.header ], [ %j.inc, %loop.j ]
+ %ptr0 = phi ptr [ %A1, %loop.i.header ], [ %ptr1, %loop.j ]
+ %ptr1 = phi ptr [ %A, %loop.i.header ], [ %ptr0, %loop.j ]
+ %j2_0 = shl i32 %j, 1
+ %j2_1 = add i32 %j2_0, 1
+ %idx0 = getelementptr [32 x i32], ptr %ptr0, i32 %i, i32 %j2_0
+ %idx1 = getelementptr [32 x i32], ptr %ptr0, i32 %i, i32 %j2_1
+ store i32 1, ptr %idx0
+ store i32 1, ptr %idx1
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 15
+ br i1 %cmp.j, label %loop.j, label %loop.i.latch
+
+loop.i.latch:
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 100
+ br i1 %cmp.i, label %loop.i.header, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index c0e1362..03343e7 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -398,7 +398,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i32 %2, ptr %B.addr.12, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%cmp4 = icmp sgt i64 %n, 0
@@ -475,7 +475,7 @@ define void @gcd7(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%0 = zext i32 %n to i64
@@ -566,7 +566,7 @@ define void @gcd8(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: %5 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %5, ptr %B.addr.12, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i32 %5, ptr %B.addr.12, align 4 --> Dst: store i32 %5, ptr %B.addr.12, align 4
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%cmp4 = icmp sgt i32 %n, 0
@@ -650,7 +650,7 @@ define void @gcd9(i32 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: %11 = load i32, ptr %arrayidx12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i32 %11, ptr %B.addr.12, align 4 --> Dst: store i32 %11, ptr %B.addr.12, align 4
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%0 = zext i32 %n to i64
diff --git a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll
index d983bd4..3e110ac 100644
--- a/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/NonAffineExpr.ll
@@ -12,7 +12,7 @@ define void @f(ptr %a, i32 %n, i1 %arg) align 2 {
; CHECK-NEXT: Src: %t.2 = load ptr, ptr %a, align 4 --> Dst: %t.4 = load i32, ptr %t.3, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %t.4 = load i32, ptr %t.3, align 4 --> Dst: %t.4 = load i32, ptr %t.3, align 4
-; CHECK-NEXT: da analyze - input [* *]!
+; CHECK-NEXT: da analyze - confused!
;
for.preheader:
%t.0 = ashr exact i32 %n, 3
diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
index 4ab8777..8cb0e2a 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -69,7 +69,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
-; CHECK-NEXT: da analyze - output [* * *]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%cmp10 = icmp sgt i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
index 4040187..e67cae7d 100644
--- a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
@@ -28,7 +28,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; CHECK-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
-; CHECK-NEXT: da analyze - output [* * *]!
+; CHECK-NEXT: da analyze - confused!
;
; LIN-LABEL: 'p2'
; LIN-NEXT: Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %i.011, ptr %arrayidx8, align 8
@@ -42,7 +42,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
; LIN-NEXT: Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
; LIN-NEXT: da analyze - confused!
; LIN-NEXT: Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
-; LIN-NEXT: da analyze - output [* * *]!
+; LIN-NEXT: da analyze - confused!
;
entry:
%cmp10 = icmp sgt i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index f64a748..8b9aa25 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -437,7 +437,7 @@ define void @symbolicrdiv6(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ss
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.12, align 4
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.12, align 4 --> Dst: store i32 %0, ptr %B.addr.12, align 4
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - confused!
;
entry:
%cmp4 = icmp eq i64 %n1, 0
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/runtime-check-known-true.ll b/llvm/test/Analysis/LoopAccessAnalysis/runtime-check-known-true.ll
new file mode 100644
index 0000000..30c8088
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/runtime-check-known-true.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
+
+; TODO: Accesses are known completely before or after.
+define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr %start.2, ptr %end) {
+; CHECK-LABEL: 'test_runtime_check_known_false_after_construction'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %ptr.iv.1 = phi ptr [ %ptr.iv.1.next, %loop ], [ %start.1, %entry ]
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %ptr.iv.2 = phi ptr [ %ptr.iv.2.next, %loop ], [ %start.2.diff, %entry ]
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: ((-8 * ((2305843009213693951 * (8 + (-1 * (ptrtoint ptr %start.1 to i64)) + (ptrtoint ptr %end to i64))) /u 8)) + %start.1) High: (8 + %start.1))
+; CHECK-NEXT: Member: {%start.1,+,-8}<%loop>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: (-8 + (-8 * ((2305843009213693951 * (8 + (-1 * (ptrtoint ptr %start.1 to i64)) + (ptrtoint ptr %end to i64))) /u 8)) + (-1 * (ptrtoint ptr %start.2 to i64)) + (ptrtoint ptr %start.1 to i64) + %start.2) High: ((-1 * (ptrtoint ptr %start.2 to i64)) + (ptrtoint ptr %start.1 to i64) + %start.2))
+; CHECK-NEXT: Member: {(-8 + (-1 * (ptrtoint ptr %start.2 to i64)) + (ptrtoint ptr %start.1 to i64) + %start.2),+,-8}<%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: Equal predicate: (zext i3 ((trunc i64 (ptrtoint ptr %end to i64) to i3) + (-1 * (trunc i64 (ptrtoint ptr %start.1 to i64) to i3))) to i64) == 0
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %gep.start.2 = getelementptr i8, ptr %start.2, i64 8
+ %start.1.int = ptrtoint ptr %start.1 to i64
+ %start.2.int = ptrtoint ptr %gep.start.2 to i64
+ %diff = sub i64 %start.1.int, %start.2.int
+ %start.2.diff = getelementptr i8, ptr %start.2, i64 %diff
+ br label %loop
+
+loop:
+ %ptr.iv.1 = phi ptr [ %ptr.iv.1.next, %loop ], [ %start.1, %entry ]
+ %ptr.iv.2 = phi ptr [ %ptr.iv.2.next, %loop ], [ %start.2.diff, %entry ]
+ %ptr.iv.2.next = getelementptr i8, ptr %ptr.iv.2, i64 -8
+ %ptr.iv.1.next = getelementptr i8, ptr %ptr.iv.1, i64 -8
+ %l = load i64, ptr %ptr.iv.2, align 8
+ store i64 %l, ptr %ptr.iv.1, align 8
+ %ec = icmp eq ptr %ptr.iv.2, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}