diff options
Diffstat (limited to 'llvm/test/Analysis/CostModel')
83 files changed, 8894 insertions, 6777 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll index c208d03..f4721f1 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll @@ -478,7 +478,7 @@ define void @frem() { ; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F128 = frem fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = frem <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2F128 = frem <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %F32 = frem float undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll index 3fbd12a..5bbb80f 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll @@ -278,21 +278,21 @@ declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x define i32 @smul(i32 %arg) { ; CHECK-LABEL: 'smul' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:8 Lat:8 SizeLat:8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:68 CodeSize:9 Lat:9 SizeLat:9 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:136 CodeSize:11 Lat:11 SizeLat:11 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:20 Lat:28 SizeLat:28 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:68 CodeSize:37 Lat:53 SizeLat:53 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:136 CodeSize:71 Lat:103 SizeLat:103 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:8 Lat:8 SizeLat:8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:76 CodeSize:9 Lat:9 SizeLat:9 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:11 Lat:11 SizeLat:11 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:10 Lat:10 SizeLat:10 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:76 CodeSize:17 Lat:17 SizeLat:17 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:31 Lat:31 SizeLat:31 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:17 Lat:17 SizeLat:17 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:31 SizeLat:31 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:17 Lat:17 SizeLat:17 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:31 Lat:31 SizeLat:31 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -341,21 +341,21 @@ declare {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x define i32 @umul(i32 %arg) { ; CHECK-LABEL: 'umul' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:33 CodeSize:7 Lat:7 SizeLat:7 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:132 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:33 CodeSize:19 Lat:27 SizeLat:27 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:36 Lat:52 SizeLat:52 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:132 CodeSize:70 Lat:102 SizeLat:102 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:37 CodeSize:7 Lat:7 SizeLat:7 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:8 Lat:8 SizeLat:8 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:148 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:37 CodeSize:9 Lat:9 SizeLat:9 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:16 Lat:16 SizeLat:16 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:148 CodeSize:30 Lat:30 SizeLat:30 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:16 Lat:16 SizeLat:16 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:30 SizeLat:30 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:16 Lat:16 SizeLat:16 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:30 SizeLat:30 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll index 7e1588f..48710d8 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll @@ -303,15 +303,15 @@ define void @extaddv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 1 for: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_8_32 = add <4 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = add <4 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <4 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = add <4 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <4 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %asw_16_32 = add <4 x i32> %i32, %sw_16_32 @@ -323,16 +323,16 @@ define void @extaddv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_16_32 = add <4 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = add <4 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <4 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <4 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = add <4 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <4 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <4 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = add <4 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64> @@ -432,26 +432,26 @@ define void @extaddv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_8_16 = add <8 x i16> %zl1_8_16, %zl2_8_16 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = add <8 x i32> %i32, %sw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = add <8 x i32> %sl1_8_32, %sl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = add <8 x i32> %sl1_8_32, %sl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = add <8 x i32> %i32, %zw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = add <8 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = add <8 x i32> %zl1_8_32, %zl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 7 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = add <8 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <8 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <8 x i64> %sl1_8_64, %sl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 7 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = add <8 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <8 x i64> %zl1_8_64, %zl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <8 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = add <8 x i32> %i32, %sw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32> @@ -462,16 +462,16 @@ define void @extaddv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = add <8 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = add <8 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <8 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <8 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = add <8 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <8 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <8 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = add <8 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64> @@ -571,26 +571,26 @@ define void @extaddv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_16 = add <16 x i16> %zl1_8_16, %zl2_8_16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = add <16 x i32> %i32, %sw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = add <16 x i32> %sl1_8_32, %sl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = add <16 x i32> %sl1_8_32, %sl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = add <16 x i32> %i32, %zw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = add <16 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = add <16 x i32> %zl1_8_32, %zl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 15 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = add <16 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <16 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = add <16 x i64> %sl1_8_64, %sl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 15 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = add <16 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <16 x i64> %zl1_8_64, %zl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = add <16 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = add <16 x i32> %i32, %sw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32> @@ -601,16 +601,16 @@ define void @extaddv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = add <16 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = add <16 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <16 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = add <16 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 12 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = add <16 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <16 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = add <16 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = add <16 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64> @@ -998,15 +998,15 @@ define void @extsubv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 1 for: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_8_32 = sub <4 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = sub <4 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <4 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = sub <4 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <4 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %asw_16_32 = sub <4 x i32> %i32, %sw_16_32 @@ -1018,16 +1018,16 @@ define void @extsubv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_16_32 = sub <4 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = sub <4 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <4 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <4 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = sub <4 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <4 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <4 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = sub <4 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64> @@ -1127,26 +1127,26 @@ define void @extsubv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_8_16 = sub <8 x i16> %zl1_8_16, %zl2_8_16 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = sub <8 x i32> %i32, %sw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = sub <8 x i32> %sl1_8_32, %sl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = sub <8 x i32> %sl1_8_32, %sl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = sub <8 x i32> %i32, %zw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = sub <8 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = sub <8 x i32> %zl1_8_32, %zl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 7 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = sub <8 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <8 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <8 x i64> %sl1_8_64, %sl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 7 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = sub <8 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <8 x i64> %zl1_8_64, %zl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <8 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = sub <8 x i32> %i32, %sw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32> @@ -1157,16 +1157,16 @@ define void @extsubv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = sub <8 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = sub <8 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <8 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <8 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = sub <8 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <8 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <8 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = sub <8 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64> @@ -1266,26 +1266,26 @@ define void @extsubv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_16 = sub <16 x i16> %zl1_8_16, %zl2_8_16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = sub <16 x i32> %i32, %sw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = sub <16 x i32> %sl1_8_32, %sl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = sub <16 x i32> %sl1_8_32, %sl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = sub <16 x i32> %i32, %zw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = sub <16 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = sub <16 x i32> %zl1_8_32, %zl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 15 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = sub <16 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <16 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = sub <16 x i64> %sl1_8_64, %sl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 15 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = sub <16 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <16 x i64> %zl1_8_64, %zl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = sub <16 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = sub <16 x i32> %i32, %sw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32> @@ -1296,16 +1296,16 @@ define void @extsubv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = sub <16 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = sub <16 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <16 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = sub <16 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 12 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = sub <16 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <16 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = sub <16 x i64> %zl1_16_64, %zl2_16_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = sub <16 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64> @@ -1693,15 +1693,15 @@ define void @extmulv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 1 for: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_8_32 = mul <4 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = mul <4 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <4 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = mul <4 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <4 x i64> %zl1_8_64, %zl2_8_64 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %asw_16_32 = mul <4 x i32> %i32, %sw_16_32 @@ -1713,22 +1713,22 @@ define void @extmulv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_16_32 = mul <4 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = mul <4 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <4 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <4 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = mul <4 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = mul <4 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = mul <4 x i64> %i64, %zw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64> @@ -1822,52 +1822,52 @@ define void @extmulv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %azl_8_16 = mul <8 x i16> %zl1_8_16, %zl2_8_16 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = mul <8 x i32> %i32, %sw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = mul <8 x i32> %sl1_8_32, %sl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = mul <8 x i32> %sl1_8_32, %sl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = mul <8 x i32> %i32, %zw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = mul <8 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = mul <8 x i32> %zl1_8_32, %zl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 7 for: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = mul <8 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <8 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <8 x i64> %sl1_8_64, %sl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 7 for: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = mul <8 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = mul <8 x i32> %i32, %sw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = mul <8 x i32> %sl1_16_32, %sl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = mul <8 x i32> %i32, %zw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = mul <8 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = mul <8 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <8 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <8 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = mul <8 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 4 for: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = mul <8 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = mul <8 x i64> %i64, %zw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64> @@ -1951,62 +1951,62 @@ define void @extmulv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i64> %i64) { ; CHECK-LABEL: 'extmulv16' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_16 = mul <16 x i16> %i16, %sw_8_16 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_16 = mul <16 x i16> %sl1_8_16, %sl2_8_16 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_16 = mul <16 x i16> %i16, %zw_8_16 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_16 = mul <16 x i16> %zl1_8_16, %zl2_8_16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_32 = mul <16 x i32> %i32, %sw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = mul <16 x i32> %sl1_8_32, %sl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_32 = mul <16 x i32> %sl1_8_32, %sl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_32 = mul <16 x i32> %i32, %zw_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = mul <16 x i32> %zl1_8_32, %zl2_8_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_32 = mul <16 x i32> %zl1_8_32, %zl2_8_32 +; CHECK-NEXT: Cost Model: Found costs of 15 for: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asw_8_64 = mul <16 x i64> %i64, %sw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <16 x i64> %sl1_8_64, %sl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %asl_8_64 = mul <16 x i64> %sl1_8_64, %sl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 15 for: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azw_8_64 = mul <16 x i64> %i64, %zw_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64 +; CHECK-NEXT: Cost Model: Found costs of 4 for: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_32 = mul <16 x i32> %i32, %sw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_32 = mul <16 x i32> %sl1_16_32, %sl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_32 = mul <16 x i32> %i32, %zw_16_32 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_32 = mul <16 x i32> %zl1_16_32, %zl2_16_32 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asw_16_64 = mul <16 x i64> %i64, %sw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <16 x i64> %sl1_16_64, %sl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %asl_16_64 = mul <16 x i64> %sl1_16_64, %sl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 12 for: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azw_16_64 = mul <16 x i64> %i64, %zw_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64 +; CHECK-NEXT: Cost Model: Found costs of 8 for: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %asw_32_64 = mul <16 x i64> %i64, %sw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64 -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:1 Lat:1 SizeLat:1 for: %azw_32_64 = mul <16 x i64> %i64, %zw_32_64 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64> diff --git a/llvm/test/Analysis/CostModel/AArch64/arith.ll b/llvm/test/Analysis/CostModel/AArch64/arith.ll index d3c1e07..dc7a3c4 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith.ll @@ -128,6 +128,31 @@ define void @i64() { ret void } +define void @i128() { +; CHECK-LABEL: 'i128' +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c = add i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %d = sub i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %e = mul i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %f = ashr i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %g = lshr i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %h = shl i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %i = and i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %j = or i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %k = xor i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %c = add i128 undef, undef + %d = sub i128 undef, undef + %e = mul i128 undef, undef + %f = ashr i128 undef, undef + %g = lshr i128 undef, undef + %h = shl i128 undef, undef + %i = and i128 undef, undef + %j = or i128 undef, undef + %k = xor i128 undef, undef + ret void +} + define void @vi8() { ; CHECK-LABEL: 'vi8' @@ -444,3 +469,28 @@ define void @vi64() { %k16 = xor <16 x i64> undef, undef ret void } + +define void @vi128() { +; CHECK-LABEL: 'vi128' +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %c2 = add <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %d2 = sub <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %e2 = mul <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %f2 = ashr <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %g2 = lshr <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %h2 = shl <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %i2 = and <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %j2 = or <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %k2 = xor <2 x i128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %c2 = add <2 x i128> undef, undef + %d2 = sub <2 x i128> undef, undef + %e2 = mul <2 x i128> undef, undef + %f2 = ashr <2 x i128> undef, undef + %g2 = lshr <2 x i128> undef, undef + %h2 = shl <2 x i128> undef, undef + %i2 = and <2 x i128> undef, undef + %j2 = or <2 x i128> undef, undef + %k2 = xor <2 x i128> undef, undef + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/bswap.ll b/llvm/test/Analysis/CostModel/AArch64/bswap.ll index 2df508e..43b1d25 100644 --- a/llvm/test/Analysis/CostModel/AArch64/bswap.ll +++ b/llvm/test/Analysis/CostModel/AArch64/bswap.ll @@ -44,7 +44,7 @@ define void @neon() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4i64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v3i32 = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:12 SizeLat:12 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v4i16 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll index 15d67489..7f5be9f 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll @@ -43,38 +43,38 @@ define void @ext() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 15 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 15 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %r0 = sext i1 poison to i8 @@ -172,22 +172,22 @@ define void @trunc() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> poison to <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> poison to <4 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> poison to <4 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> poison to <4 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s4i32i64 = trunc <4 x i64> poison to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> poison to <8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s8i16i32 = trunc <8 x i32> poison to <8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %s16i8i16 = trunc <16 x i16> poison to <16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %r8 = trunc i8 poison to i1 @@ -275,34 +275,34 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r82 = fptrunc <4 x double> poison to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r83 = fptrunc <8 x double> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r84 = fptrunc <16 x double> poison to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %truncf64f16 = fptrunc double poison to half -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f64f16 = fptrunc <2 x double> poison to <2 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f64f16 = fptrunc <4 x double> poison to <4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f64f16 = fptrunc <8 x double> poison to <8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f64f16 = fptrunc <16 x double> poison to <16 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %truncv2f64f16 = fptrunc <2 x double> poison to <2 x half> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %truncv4f64f16 = fptrunc <4 x double> poison to <4 x half> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %truncv8f64f16 = fptrunc <8 x double> poison to <8 x half> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %truncv16f64f16 = fptrunc <16 x double> poison to <16 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %truncv32f16 = fptrunc float poison to half ; CHECK-NEXT: Cost Model: Found costs of 1 for: %truncv2f32f16 = fptrunc <2 x float> poison to <2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %truncv4f32f16 = fptrunc <4 x float> poison to <4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f32f16 = fptrunc <8 x float> poison to <8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f32f16 = fptrunc <16 x float> poison to <16 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %truncv8f32f16 = fptrunc <8 x float> poison to <8 x half> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %truncv16f32f16 = fptrunc <16 x float> poison to <16 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r85 = fpext float poison to double ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r87 = fpext <4 x float> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r88 = fpext <8 x float> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r89 = fpext <16 x float> poison to <16 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %extf16f32 = fpext half poison to float ; CHECK-NEXT: Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x half> poison to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x half> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x half> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x half> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %extv8f16f32 = fpext <8 x half> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %extv16f16f32 = fpext <16 x half> poison to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %extf16f64 = fpext half poison to double -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x half> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x half> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x half> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x half> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %extv2f16f64 = fpext <2 x half> poison to <2 x double> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %extv4f16f64 = fpext <4 x half> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %extv8f16f64 = fpext <8 x half> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %extv16f16f64 = fpext <16 x half> poison to <16 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8> @@ -311,158 +311,158 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r98 = fptoui <2 x float> poison to <2 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r99 = fptosi <2 x float> poison to <2 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r100 = fptoui <2 x double> poison to <2 x i1> +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r101 = fptosi <2 x double> poison to <2 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r102 = fptoui <2 x double> poison to <2 x i8> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r103 = fptosi <2 x double> poison to <2 x i8> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r104 = fptoui <2 x double> poison to <2 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r105 = fptosi <2 x double> poison to <2 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r106 = fptoui <2 x double> poison to <2 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r107 = fptosi <2 x double> poison to <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16> +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r110 = fptoui <4 x float> poison to <4 x i1> +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r111 = fptosi <4 x float> poison to <4 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r112 = fptoui <4 x float> poison to <4 x i8> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r113 = fptosi <4 x float> poison to <4 x i8> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r114 = fptoui <4 x float> poison to <4 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r115 = fptosi <4 x float> poison to <4 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> poison to <4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> poison to <4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> poison to <4 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> poison to <4 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> poison to <4 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> poison to <4 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> poison to <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> poison to <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> poison to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> poison to <8 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> poison to <8 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> poison to <8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> poison to <8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> poison to <8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> poison to <8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r118 = fptoui <4 x float> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r119 = fptosi <4 x float> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r120 = fptoui <4 x double> poison to <4 x i1> +; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r121 = fptosi <4 x double> poison to <4 x i1> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r122 = fptoui <4 x double> poison to <4 x i8> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r123 = fptosi <4 x double> poison to <4 x i8> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r124 = fptoui <4 x double> poison to <4 x i16> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r125 = fptosi <4 x double> poison to <4 x i16> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r126 = fptoui <4 x double> poison to <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r127 = fptosi <4 x double> poison to <4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r128 = fptoui <4 x double> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r129 = fptosi <4 x double> poison to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:41 CodeSize:25 Lat:41 SizeLat:41 for: %r130 = fptoui <8 x float> poison to <8 x i1> +; CHECK-NEXT: Cost Model: Found costs of RThru:41 CodeSize:25 Lat:41 SizeLat:41 for: %r131 = fptosi <8 x float> poison to <8 x i1> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r132 = fptoui <8 x float> poison to <8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r133 = fptosi <8 x float> poison to <8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r134 = fptoui <8 x float> poison to <8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r135 = fptosi <8 x float> poison to <8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r136 = fptoui <8 x float> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r137 = fptosi <8 x float> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r138 = fptoui <8 x float> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r139 = fptosi <8 x float> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r140 = fptoui <8 x double> poison to <8 x i1> +; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r141 = fptosi <8 x double> poison to <8 x i1> +; CHECK-NEXT: Cost Model: Found costs of 11 for: %r142 = fptoui <8 x double> poison to <8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 11 for: %r143 = fptosi <8 x double> poison to <8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 11 for: %r144 = fptoui <8 x double> poison to <8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 11 for: %r145 = fptosi <8 x double> poison to <8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r146 = fptoui <8 x double> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r147 = fptosi <8 x double> poison to <8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r148 = fptoui <8 x double> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r149 = fptosi <8 x double> poison to <8 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:83 CodeSize:51 Lat:83 SizeLat:83 for: %r150 = fptoui <16 x float> poison to <16 x i1> +; CHECK-NEXT: Cost Model: Found costs of RThru:83 CodeSize:51 Lat:83 SizeLat:83 for: %r151 = fptosi <16 x float> poison to <16 x i1> +; CHECK-NEXT: Cost Model: Found costs of 11 for: %r152 = fptoui <16 x float> poison to <16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 11 for: %r153 = fptosi <16 x float> poison to <16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r154 = fptoui <16 x float> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r155 = fptosi <16 x float> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r156 = fptoui <16 x float> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r157 = fptosi <16 x float> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %r158 = fptoui <16 x float> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %r159 = fptosi <16 x float> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of RThru:87 CodeSize:55 Lat:87 SizeLat:87 for: %r160 = fptoui <16 x double> poison to <16 x i1> +; CHECK-NEXT: Cost Model: Found costs of RThru:87 CodeSize:55 Lat:87 SizeLat:87 for: %r161 = fptosi <16 x double> poison to <16 x i1> +; CHECK-NEXT: Cost Model: Found costs of 23 for: %r162 = fptoui <16 x double> poison to <16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 23 for: %r163 = fptosi <16 x double> poison to <16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 22 for: %r164 = fptoui <16 x double> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 22 for: %r165 = fptosi <16 x double> poison to <16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %r166 = fptoui <16 x double> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %r167 = fptosi <16 x double> poison to <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r168 = fptoui <16 x double> poison to <16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r169 = fptosi <16 x double> poison to <16 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r172 = uitofp <2 x i8> poison to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r173 = sitofp <2 x i8> poison to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r174 = uitofp <2 x i16> poison to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r175 = sitofp <2 x i16> poison to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r178 = uitofp <2 x i64> poison to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r179 = sitofp <2 x i64> poison to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r182 = uitofp <2 x i8> poison to <2 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r183 = sitofp <2 x i8> poison to <2 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r184 = uitofp <2 x i16> poison to <2 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r185 = sitofp <2 x i16> poison to <2 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r186 = uitofp <2 x i32> poison to <2 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r187 = sitofp <2 x i32> poison to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r192 = uitofp <4 x i8> poison to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r193 = sitofp <4 x i8> poison to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r194 = uitofp <4 x i16> poison to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r195 = sitofp <4 x i16> poison to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> poison to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> poison to <4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i16> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i16> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i16> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i16> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:18 SizeLat:18 for: %r198 = uitofp <4 x i64> poison to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:18 SizeLat:18 for: %r199 = sitofp <4 x i64> poison to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r200 = uitofp <4 x i1> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r201 = sitofp <4 x i1> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 9 for: %r202 = uitofp <4 x i8> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 9 for: %r203 = sitofp <4 x i8> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 9 for: %r204 = uitofp <4 x i16> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 9 for: %r205 = sitofp <4 x i16> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r206 = uitofp <4 x i32> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r207 = sitofp <4 x i32> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r208 = uitofp <4 x i64> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r209 = sitofp <4 x i64> poison to <4 x double> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r210 = uitofp <8 x i1> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r211 = sitofp <8 x i1> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r212 = uitofp <8 x i8> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %r213 = sitofp <8 x i8> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r214 = uitofp <8 x i16> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r215 = sitofp <8 x i16> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r216 = uitofp <8 x i32> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r217 = sitofp <8 x i32> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:22 Lat:36 SizeLat:36 for: %r218 = uitofp <8 x i64> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:22 Lat:36 SizeLat:36 for: %r219 = sitofp <8 x i64> poison to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r220 = uitofp <8 x i1> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r221 = sitofp <8 x i1> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 19 for: %r222 = uitofp <8 x i8> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 19 for: %r223 = sitofp <8 x i8> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 19 for: %r224 = uitofp <8 x i16> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 19 for: %r225 = sitofp <8 x i16> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 19 for: %r226 = uitofp <8 x i16> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 19 for: %r227 = sitofp <8 x i16> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r228 = uitofp <8 x i64> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r229 = sitofp <8 x i64> poison to <8 x double> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r230 = uitofp <16 x i1> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r231 = sitofp <16 x i1> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 21 for: %r232 = uitofp <16 x i8> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 21 for: %r233 = sitofp <16 x i8> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r234 = uitofp <16 x i16> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r235 = sitofp <16 x i16> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r236 = uitofp <16 x i32> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r237 = sitofp <16 x i32> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:72 SizeLat:72 for: %r238 = uitofp <16 x i64> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:72 SizeLat:72 for: %r239 = sitofp <16 x i64> poison to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of 15 for: %r240 = uitofp <16 x i1> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 15 for: %r241 = sitofp <16 x i1> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 39 for: %r242 = uitofp <16 x i8> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 39 for: %r243 = sitofp <16 x i8> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 38 for: %r244 = uitofp <16 x i16> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 38 for: %r245 = sitofp <16 x i16> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 38 for: %r246 = uitofp <16 x i16> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 38 for: %r247 = sitofp <16 x i16> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r248 = uitofp <16 x i64> poison to <16 x double> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %r249 = sitofp <16 x i64> poison to <16 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %r30 = fptoui float poison to i1 @@ -893,8 +893,8 @@ define i32 @load_extends() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v12 = sext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v13 = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v12 = sext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v13 = zext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %loadi8 = load i8, ptr undef @@ -1031,58 +1031,58 @@ define void @fp16cast() { ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r98 = fptoui <2 x half> poison to <2 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r99 = fptosi <2 x half> poison to <2 x i64> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r116 = fptoui <4 x half> poison to <4 x i32> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r117 = fptosi <4 x half> poison to <4 x i32> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r118 = fptoui <4 x half> poison to <4 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r119 = fptosi <4 x half> poison to <4 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r130 = fptoui <8 x half> poison to <8 x i1> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r131 = fptosi <8 x half> poison to <8 x i1> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r132 = fptoui <8 x half> poison to <8 x i8> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r133 = fptosi <8 x half> poison to <8 x i8> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:41 CodeSize:25 Lat:41 SizeLat:41 for: %r136 = fptoui <8 x half> poison to <8 x i32> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:41 CodeSize:25 Lat:41 SizeLat:41 for: %r137 = fptosi <8 x half> poison to <8 x i32> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r138 = fptoui <8 x half> poison to <8 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r139 = fptosi <8 x half> poison to <8 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r150 = fptoui <16 x half> poison to <16 x i1> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r151 = fptosi <16 x half> poison to <16 x i1> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r152 = fptoui <16 x half> poison to <16 x i8> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r153 = fptosi <16 x half> poison to <16 x i8> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 2 for: %r154 = fptoui <16 x half> poison to <16 x i16> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 2 for: %r155 = fptosi <16 x half> poison to <16 x i16> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:82 CodeSize:50 Lat:82 SizeLat:82 for: %r156 = fptoui <16 x half> poison to <16 x i32> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:82 CodeSize:50 Lat:82 SizeLat:82 for: %r157 = fptosi <16 x half> poison to <16 x i32> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:54 Lat:86 SizeLat:86 for: %r158 = fptoui <16 x half> poison to <16 x i64> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:54 Lat:86 SizeLat:86 for: %r159 = fptosi <16 x half> poison to <16 x i64> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r252 = uitofp <8 x i8> poison to <8 x half> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r253 = sitofp <8 x i8> poison to <8 x half> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 3 for: %r256 = uitofp <8 x i32> poison to <8 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 3 for: %r257 = sitofp <8 x i32> poison to <8 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 7 for: %r258 = uitofp <8 x i64> poison to <8 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 7 for: %r259 = sitofp <8 x i64> poison to <8 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 3 for: %r260 = uitofp <16 x i1> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 3 for: %r261 = sitofp <16 x i1> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 3 for: %r262 = uitofp <16 x i8> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 3 for: %r263 = sitofp <16 x i8> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 2 for: %r264 = uitofp <16 x i16> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 2 for: %r265 = sitofp <16 x i16> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 6 for: %r266 = uitofp <16 x i32> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 6 for: %r267 = sitofp <16 x i32> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 14 for: %r268 = uitofp <16 x i64> poison to <16 x half> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 14 for: %r269 = sitofp <16 x i64> poison to <16 x half> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fp16cast' @@ -1104,58 +1104,58 @@ define void @fp16cast() { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r98 = fptoui <2 x half> poison to <2 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r99 = fptosi <2 x half> poison to <2 x i64> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r116 = fptoui <4 x half> poison to <4 x i32> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r117 = fptosi <4 x half> poison to <4 x i32> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r118 = fptoui <4 x half> poison to <4 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r119 = fptosi <4 x half> poison to <4 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r130 = fptoui <8 x half> poison to <8 x i1> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r131 = fptosi <8 x half> poison to <8 x i1> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r132 = fptoui <8 x half> poison to <8 x i8> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r133 = fptosi <8 x half> poison to <8 x i8> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of 4 for: %r136 = fptoui <8 x half> poison to <8 x i32> +; CHECK-FP16-NEXT: Cost Model: Found costs of 4 for: %r137 = fptosi <8 x half> poison to <8 x i32> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r138 = fptoui <8 x half> poison to <8 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r139 = fptosi <8 x half> poison to <8 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r150 = fptoui <16 x half> poison to <16 x i1> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r151 = fptosi <16 x half> poison to <16 x i1> +; CHECK-FP16-NEXT: Cost Model: Found costs of 3 for: %r152 = fptoui <16 x half> poison to <16 x i8> +; CHECK-FP16-NEXT: Cost Model: Found costs of 3 for: %r153 = fptosi <16 x half> poison to <16 x i8> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r154 = fptoui <16 x half> poison to <16 x i16> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r155 = fptosi <16 x half> poison to <16 x i16> +; CHECK-FP16-NEXT: Cost Model: Found costs of 8 for: %r156 = fptoui <16 x half> poison to <16 x i32> +; CHECK-FP16-NEXT: Cost Model: Found costs of 8 for: %r157 = fptosi <16 x half> poison to <16 x i32> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:54 Lat:86 SizeLat:86 for: %r158 = fptoui <16 x half> poison to <16 x i64> +; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:86 CodeSize:54 Lat:86 SizeLat:86 for: %r159 = fptosi <16 x half> poison to <16 x i64> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r252 = uitofp <8 x i8> poison to <8 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r253 = sitofp <8 x i8> poison to <8 x half> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half> ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half> -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 3 for: %r256 = uitofp <8 x i32> poison to <8 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 3 for: %r257 = sitofp <8 x i32> poison to <8 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 7 for: %r258 = uitofp <8 x i64> poison to <8 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 7 for: %r259 = sitofp <8 x i64> poison to <8 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 3 for: %r260 = uitofp <16 x i1> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 3 for: %r261 = sitofp <16 x i1> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 4 for: %r262 = uitofp <16 x i8> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 4 for: %r263 = sitofp <16 x i8> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r264 = uitofp <16 x i16> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %r265 = sitofp <16 x i16> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 6 for: %r266 = uitofp <16 x i32> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 6 for: %r267 = sitofp <16 x i32> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 14 for: %r268 = uitofp <16 x i64> poison to <16 x half> +; CHECK-FP16-NEXT: Cost Model: Found costs of 14 for: %r269 = sitofp <16 x i64> poison to <16 x half> ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %r30 = fptoui half poison to i1 @@ -1242,46 +1242,46 @@ define void @bf16cast() { ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %extf16f32 = fpext bfloat poison to float ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x bfloat> poison to <2 x float> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x bfloat> poison to <4 x float> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x bfloat> poison to <8 x float> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x bfloat> poison to <16 x float> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extf16f64 = fpext bfloat poison to double -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x bfloat> poison to <2 x double> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x bfloat> poison to <4 x double> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x bfloat> poison to <8 x double> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x bfloat> poison to <16 x double> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncf16f32 = fptrunc float poison to bfloat -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f32 = fptrunc <2 x float> poison to <2 x bfloat> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f32 = fptrunc <4 x float> poison to <4 x bfloat> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f32 = fptrunc <8 x float> poison to <8 x bfloat> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:30 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f32 = fptrunc <16 x float> poison to <16 x bfloat> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %truncf16f64 = fptrunc double poison to bfloat -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f64 = fptrunc <2 x double> poison to <2 x bfloat> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f64 = fptrunc <4 x double> poison to <4 x bfloat> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f64 = fptrunc <8 x double> poison to <8 x bfloat> -; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f64 = fptrunc <16 x double> poison to <16 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 2 for: %extv8f16f32 = fpext <8 x bfloat> poison to <8 x float> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 4 for: %extv16f16f32 = fpext <16 x bfloat> poison to <16 x float> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 2 for: %extf16f64 = fpext bfloat poison to double +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 2 for: %extv2f16f64 = fpext <2 x bfloat> poison to <2 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 3 for: %extv4f16f64 = fpext <4 x bfloat> poison to <4 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 6 for: %extv8f16f64 = fpext <8 x bfloat> poison to <8 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 12 for: %extv16f16f64 = fpext <16 x bfloat> poison to <16 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 8 for: %truncf16f32 = fptrunc float poison to bfloat +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 8 for: %truncv2f16f32 = fptrunc <2 x float> poison to <2 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 8 for: %truncv4f16f32 = fptrunc <4 x float> poison to <4 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 15 for: %truncv8f16f32 = fptrunc <8 x float> poison to <8 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 30 for: %truncv16f16f32 = fptrunc <16 x float> poison to <16 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 9 for: %truncf16f64 = fptrunc double poison to bfloat +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 9 for: %truncv2f16f64 = fptrunc <2 x double> poison to <2 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 10 for: %truncv4f16f64 = fptrunc <4 x double> poison to <4 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 19 for: %truncv8f16f64 = fptrunc <8 x double> poison to <8 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found costs of 38 for: %truncv16f16f64 = fptrunc <16 x double> poison to <16 x bfloat> ; CHECK-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-BF16-LABEL: 'bf16cast' ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %extf16f32 = fpext bfloat poison to float ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x bfloat> poison to <2 x float> ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x bfloat> poison to <4 x float> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x bfloat> poison to <8 x float> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x bfloat> poison to <16 x float> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extf16f64 = fpext bfloat poison to double -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x bfloat> poison to <2 x double> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x bfloat> poison to <4 x double> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x bfloat> poison to <8 x double> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x bfloat> poison to <16 x double> +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %extv8f16f32 = fpext <8 x bfloat> poison to <8 x float> +; CHECK-BF16-NEXT: Cost Model: Found costs of 4 for: %extv16f16f32 = fpext <16 x bfloat> poison to <16 x float> +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %extf16f64 = fpext bfloat poison to double +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %extv2f16f64 = fpext <2 x bfloat> poison to <2 x double> +; CHECK-BF16-NEXT: Cost Model: Found costs of 3 for: %extv4f16f64 = fpext <4 x bfloat> poison to <4 x double> +; CHECK-BF16-NEXT: Cost Model: Found costs of 6 for: %extv8f16f64 = fpext <8 x bfloat> poison to <8 x double> +; CHECK-BF16-NEXT: Cost Model: Found costs of 12 for: %extv16f16f64 = fpext <16 x bfloat> poison to <16 x double> ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %truncf16f32 = fptrunc float poison to bfloat -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f32 = fptrunc <2 x float> poison to <2 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 8 for: %truncv2f16f32 = fptrunc <2 x float> poison to <2 x bfloat> ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %truncv4f16f32 = fptrunc <4 x float> poison to <4 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f32 = fptrunc <8 x float> poison to <8 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f32 = fptrunc <16 x float> poison to <16 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %truncv8f16f32 = fptrunc <8 x float> poison to <8 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 4 for: %truncv16f16f32 = fptrunc <16 x float> poison to <16 x bfloat> ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %truncf16f64 = fptrunc double poison to bfloat -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f64 = fptrunc <2 x double> poison to <2 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f64 = fptrunc <4 x double> poison to <4 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f64 = fptrunc <8 x double> poison to <8 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f64 = fptrunc <16 x double> poison to <16 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %truncv2f16f64 = fptrunc <2 x double> poison to <2 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 3 for: %truncv4f16f64 = fptrunc <4 x double> poison to <4 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 6 for: %truncv8f16f64 = fptrunc <8 x double> poison to <8 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 12 for: %truncv16f16f64 = fptrunc <16 x double> poison to <16 x bfloat> ; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %extf16f32 = fpext bfloat poison to float diff --git a/llvm/test/Analysis/CostModel/AArch64/clmul.ll b/llvm/test/Analysis/CostModel/AArch64/clmul.ll new file mode 100644 index 0000000..3e5fa09 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/clmul.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-- | FileCheck %s + +define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b32, i8 %a8, i8 %b8) { +; CHECK-LABEL: 'clmul' +; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128) +; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64) +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128) + %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64) + %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32) + %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8) + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll index f0e64b5..3a6235a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll @@ -17,10 +17,10 @@ define void @cmps() { ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cf32 = fcmp ogt float undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cf64 = fcmp ogt double undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %cbf64 = fcmp ogt bfloat undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cfv816 = fcmp olt <8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %cfv816 = fcmp olt <8 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cfv432 = fcmp oge <4 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:1 for: %cfv264 = fcmp oge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c8 = icmp slt i8 undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll index 15d09e00..3bd929d 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll @@ -8,42 +8,42 @@ define void @foo_no_vscale_range() { ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:96 CodeSize:37 Lat:37 SizeLat:37 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:96 CodeSize:66 Lat:66 SizeLat:66 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i64.v32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %res.i64.v32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i32.v32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %res.i32.v32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:96 CodeSize:37 Lat:37 SizeLat:37 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:96 CodeSize:66 Lat:66 SizeLat:66 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.v16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i64.v32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %res.i64.v32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.v16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:9 Lat:9 SizeLat:9 for: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:10 Lat:10 SizeLat:10 for: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true) @@ -101,22 +101,22 @@ define void @foo_vscale_range_1_16() vscale_range(1,16) { ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:18 SizeLat:18 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:18 SizeLat:18 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:18 SizeLat:18 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:13 Lat:13 SizeLat:13 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:18 Lat:18 SizeLat:18 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true) @@ -150,22 +150,22 @@ define void @foo_vscale_range_1_16384() vscale_range(1,16384) { ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv16i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %res.i64.nxv32i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv2i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv4i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv8i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv16i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 true) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %res.i32.nxv32i1.zip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv2i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv4i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv8i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i64.nxv16i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %res.i64.nxv32i1.nzip = call i64 @llvm.experimental.cttz.elts.i64.nxv32i1(<vscale x 32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv2i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv4i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv8i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %res.i32.nxv16i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> undef, i1 false) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:21 Lat:21 SizeLat:21 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %res.i32.nxv32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.nxv32i1(<vscale x 32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true) diff --git a/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll new file mode 100644 index 0000000..9efcf91 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/extract-last-active.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=NEON +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=SVE +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme -force-streaming | FileCheck %s --check-prefix=SME-STREAMING + +define void @extractions() { +; NEON-LABEL: 'extractions' +; NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) +; NEON-NEXT: Cost Model: Invalid cost for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SVE-LABEL: 'extractions' +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) +; SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SME-STREAMING-LABEL: 'extractions' +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) +; SME-STREAMING-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + + ;; Legal types + %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison) + %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison) + %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison) + %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison) + %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison) + %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison) + %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison) + %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison) + %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison) + %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison) + %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison) + %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison) + %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison) + %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison) + %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison) + %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison) + + ;; Wider-than-legal + %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison) + %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison) + %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison) + %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison) + %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison) + %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison) + %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison) + %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison) + %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison) + %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison) + %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison) + %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison) + %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison) + %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison) + %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison) + %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison) + + ;; Narrower-than-legal + %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison) + %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison) + %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison) + %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison) + %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison) + %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison) + %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison) + %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison) + %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison) + %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison) + %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison) + %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison) + %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison) + %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison) + %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison) + %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison) + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll index c5ec319..6900d47 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fcmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll @@ -32,8 +32,8 @@ define void @fcmp_oeq_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp oeq half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp oeq <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp oeq <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oeq <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp oeq <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp oeq <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp oeq <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_oeq_half' @@ -57,8 +57,8 @@ define void @fcmp_oeq_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp oeq bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp oeq <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp oeq <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp oeq bfloat undef, undef @@ -99,8 +99,8 @@ define void @fcmp_ogt_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ogt half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ogt <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ogt <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ogt <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ogt <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp ogt <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp ogt <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ogt_half' @@ -124,8 +124,8 @@ define void @fcmp_ogt_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ogt bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ogt <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp ogt <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ogt bfloat undef, undef @@ -166,8 +166,8 @@ define void @fcmp_oge_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp oge half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp oge <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp oge <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oge <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp oge <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp oge <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp oge <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_oge_half' @@ -191,8 +191,8 @@ define void @fcmp_oge_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp oge bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp oge <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp oge <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp oge bfloat undef, undef @@ -233,8 +233,8 @@ define void @fcmp_olt_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp olt half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp olt <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp olt <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp olt <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp olt <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp olt <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp olt <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_olt_half' @@ -258,8 +258,8 @@ define void @fcmp_olt_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp olt bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp olt <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp olt <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp olt bfloat undef, undef @@ -300,8 +300,8 @@ define void @fcmp_ole_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ole half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ole <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ole <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ole <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ole <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp ole <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp ole <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ole_half' @@ -325,8 +325,8 @@ define void @fcmp_ole_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ole bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ole <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp ole <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ole bfloat undef, undef @@ -367,8 +367,8 @@ define void @fcmp_one_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %f16 = fcmp one half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2f16 = fcmp one <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4f16 = fcmp one <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8f16 = fcmp one <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16f16 = fcmp one <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 11 for: %v8f16 = fcmp one <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16f16 = fcmp one <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_one_half' @@ -392,8 +392,8 @@ define void @fcmp_one_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %bf16 = fcmp one bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16bf16 = fcmp one <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16bf16 = fcmp one <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp one bfloat undef, undef @@ -434,8 +434,8 @@ define void @fcmp_ord_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ord half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2f16 = fcmp ord <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4f16 = fcmp ord <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8f16 = fcmp ord <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16f16 = fcmp ord <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 11 for: %v8f16 = fcmp ord <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16f16 = fcmp ord <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ord_half' @@ -459,8 +459,8 @@ define void @fcmp_ord_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ord bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16bf16 = fcmp ord <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16bf16 = fcmp ord <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ord bfloat undef, undef @@ -501,8 +501,8 @@ define void @fcmp_ueq_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %f16 = fcmp ueq half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2f16 = fcmp ueq <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4f16 = fcmp ueq <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8f16 = fcmp ueq <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16f16 = fcmp ueq <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 11 for: %v8f16 = fcmp ueq <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16f16 = fcmp ueq <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ueq_half' @@ -526,8 +526,8 @@ define void @fcmp_ueq_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:6 SizeLat:4 for: %bf16 = fcmp ueq bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16bf16 = fcmp ueq <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16bf16 = fcmp ueq <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ueq bfloat undef, undef @@ -568,8 +568,8 @@ define void @fcmp_ugt_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ugt half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ugt <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ugt <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ugt <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ugt <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp ugt <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp ugt <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ugt_half' @@ -593,8 +593,8 @@ define void @fcmp_ugt_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ugt bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ugt <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp ugt <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ugt bfloat undef, undef @@ -635,8 +635,8 @@ define void @fcmp_uge_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp uge half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp uge <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp uge <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp uge <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp uge <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp uge <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp uge <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_uge_half' @@ -660,8 +660,8 @@ define void @fcmp_uge_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp uge bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp uge <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp uge <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp uge bfloat undef, undef @@ -702,8 +702,8 @@ define void @fcmp_ult_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ult half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ult <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ult <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ult <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ult <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp ult <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp ult <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ult_half' @@ -727,8 +727,8 @@ define void @fcmp_ult_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ult bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ult <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp ult <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ult bfloat undef, undef @@ -769,8 +769,8 @@ define void @fcmp_ule_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp ule half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp ule <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp ule <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ule <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp ule <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp ule <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp ule <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ule_half' @@ -794,8 +794,8 @@ define void @fcmp_ule_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp ule bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ule bfloat undef, undef @@ -836,8 +836,8 @@ define void @fcmp_une_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp une half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp une <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp une <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp une <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp une <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp une <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp une <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_une_half' @@ -861,8 +861,8 @@ define void @fcmp_une_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp une bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp une bfloat undef, undef @@ -903,8 +903,8 @@ define void @fcmp_uno_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp uno half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2f16 = fcmp uno <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4f16 = fcmp uno <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8f16 = fcmp uno <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16f16 = fcmp uno <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 11 for: %v8f16 = fcmp uno <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16f16 = fcmp uno <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_uno_half' @@ -928,8 +928,8 @@ define void @fcmp_uno_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp uno bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:6 Lat:9 SizeLat:6 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:9 SizeLat:15 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:16 SizeLat:22 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp uno bfloat undef, undef @@ -970,8 +970,8 @@ define void @fcmp_true_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp true half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp true <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp true <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp true <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp true <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp true <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp true <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_true_half' @@ -995,8 +995,8 @@ define void @fcmp_true_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp true bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp true bfloat undef, undef @@ -1037,8 +1037,8 @@ define void @fcmp_false_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %f16 = fcmp false half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2f16 = fcmp false <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4f16 = fcmp false <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp false <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16f16 = fcmp false <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of 7 for: %v8f16 = fcmp false <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16f16 = fcmp false <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_false_half' @@ -1062,8 +1062,8 @@ define void @fcmp_false_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %bf16 = fcmp false bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:5 SizeLat:4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 7 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:14 Lat:12 SizeLat:14 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp false bfloat undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/fp-conversions-odd-vector-types.ll b/llvm/test/Analysis/CostModel/AArch64/fp-conversions-odd-vector-types.ll index 16aea89..d588cf2 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fp-conversions-odd-vector-types.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fp-conversions-odd-vector-types.ll @@ -12,7 +12,7 @@ define <3 x i32> @fptoui_v3f32_to_v3i32(<3 x float> %in, ptr %dst) { define <5 x i32> @fptoui_v5f32_to_v5i32(<5 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptoui_v5f32_to_v5i32' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptoui <5 x float> %in to <5 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %conv = fptoui <5 x float> %in to <5 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <5 x i32> %conv ; %conv = fptoui <5 x float> %in to <5 x i32> @@ -21,7 +21,7 @@ define <5 x i32> @fptoui_v5f32_to_v5i32(<5 x float> %in, ptr %dst) { define <3 x i16> @fptoui_v3f32_to_v3i16(<3 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptoui_v3f32_to_v3i16' -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptoui <3 x float> %in to <3 x i16> +; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:9 Lat:15 SizeLat:15 for: %conv = fptoui <3 x float> %in to <3 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <3 x i16> %conv ; %conv = fptoui <3 x float> %in to <3 x i16> @@ -30,7 +30,7 @@ define <3 x i16> @fptoui_v3f32_to_v3i16(<3 x float> %in, ptr %dst) { define <5 x i16> @fptoui_v5f32_to_v5i16(<5 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptoui_v5f32_to_v5i16' -; CHECK-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptoui <5 x float> %in to <5 x i16> +; CHECK-NEXT: Cost Model: Found costs of RThru:25 CodeSize:15 Lat:25 SizeLat:25 for: %conv = fptoui <5 x float> %in to <5 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <5 x i16> %conv ; %conv = fptoui <5 x float> %in to <5 x i16> @@ -39,7 +39,7 @@ define <5 x i16> @fptoui_v5f32_to_v5i16(<5 x float> %in, ptr %dst) { define <3 x i8> @fptoui_v3f32_to_v3i8(<3 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptoui_v3f32_to_v3i8' -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptoui <3 x float> %in to <3 x i8> +; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:9 Lat:15 SizeLat:15 for: %conv = fptoui <3 x float> %in to <3 x i8> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <3 x i8> %conv ; %conv = fptoui <3 x float> %in to <3 x i8> @@ -48,7 +48,7 @@ define <3 x i8> @fptoui_v3f32_to_v3i8(<3 x float> %in, ptr %dst) { define <9 x i8> @fptoui_v9f32_to_v9i8(<9 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptoui_v9f32_to_v9i8' -; CHECK-NEXT: Cost Model: Found costs of RThru:45 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptoui <9 x float> %in to <9 x i8> +; CHECK-NEXT: Cost Model: Found costs of RThru:45 CodeSize:27 Lat:45 SizeLat:45 for: %conv = fptoui <9 x float> %in to <9 x i8> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <9 x i8> %conv ; %conv = fptoui <9 x float> %in to <9 x i8> @@ -66,7 +66,7 @@ define <3 x i32> @fptosi_v3f32_to_v3i32(<3 x float> %in, ptr %dst) { define <6 x i32> @fptosi_v6f32_to_v6i32(<6 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptosi_v6f32_to_v6i32' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptosi <6 x float> %in to <6 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %conv = fptosi <6 x float> %in to <6 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <6 x i32> %conv ; %conv = fptosi <6 x float> %in to <6 x i32> @@ -75,7 +75,7 @@ define <6 x i32> @fptosi_v6f32_to_v6i32(<6 x float> %in, ptr %dst) { define <3 x i16> @fptosi_v3f32_to_v3i16(<3 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptosi_v3f32_to_v3i16' -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptosi <3 x float> %in to <3 x i16> +; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:9 Lat:15 SizeLat:15 for: %conv = fptosi <3 x float> %in to <3 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <3 x i16> %conv ; %conv = fptosi <3 x float> %in to <3 x i16> @@ -84,7 +84,7 @@ define <3 x i16> @fptosi_v3f32_to_v3i16(<3 x float> %in, ptr %dst) { define <6 x i16> @fptosi_v6f32_to_v6i16(<6 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptosi_v6f32_to_v6i16' -; CHECK-NEXT: Cost Model: Found costs of RThru:30 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptosi <6 x float> %in to <6 x i16> +; CHECK-NEXT: Cost Model: Found costs of RThru:30 CodeSize:18 Lat:30 SizeLat:30 for: %conv = fptosi <6 x float> %in to <6 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <6 x i16> %conv ; %conv = fptosi <6 x float> %in to <6 x i16> @@ -93,7 +93,7 @@ define <6 x i16> @fptosi_v6f32_to_v6i16(<6 x float> %in, ptr %dst) { define <3 x i8> @fptosi_v3f32_to_v3i8(<3 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptosi_v3f32_to_v3i8' -; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptosi <3 x float> %in to <3 x i8> +; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:9 Lat:15 SizeLat:15 for: %conv = fptosi <3 x float> %in to <3 x i8> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <3 x i8> %conv ; %conv = fptosi <3 x float> %in to <3 x i8> @@ -102,7 +102,7 @@ define <3 x i8> @fptosi_v3f32_to_v3i8(<3 x float> %in, ptr %dst) { define <5 x i8> @fptosi_v5f32_to_v5i8(<5 x float> %in, ptr %dst) { ; CHECK-LABEL: 'fptosi_v5f32_to_v5i8' -; CHECK-NEXT: Cost Model: Found costs of RThru:25 CodeSize:1 Lat:1 SizeLat:1 for: %conv = fptosi <5 x float> %in to <5 x i8> +; CHECK-NEXT: Cost Model: Found costs of RThru:25 CodeSize:15 Lat:25 SizeLat:25 for: %conv = fptosi <5 x float> %in to <5 x i8> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <5 x i8> %conv ; %conv = fptosi <5 x float> %in to <5 x i8> @@ -120,7 +120,7 @@ define <3 x float> @uitofp_v3i32_to_v3f32(<3 x i32> %in, ptr %dst) { define <5 x float> @uitofp_v5i32_to_v5f32(<5 x i32> %in, ptr %dst) { ; CHECK-LABEL: 'uitofp_v5i32_to_v5f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <5 x i32> %in to <5 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %conv = uitofp <5 x i32> %in to <5 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <5 x float> %conv ; %conv = uitofp <5 x i32> %in to <5 x float> @@ -138,7 +138,7 @@ define <3 x float> @uitofp_v3i16_to_v3f32(<3 x i16> %in, ptr %dst) { define <7 x float> @uitofp_v7i16_to_v7f32(<7 x i16> %in, ptr %dst) { ; CHECK-LABEL: 'uitofp_v7i16_to_v7f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <7 x i16> %in to <7 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:17 Lat:27 SizeLat:27 for: %conv = uitofp <7 x i16> %in to <7 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <7 x float> %conv ; %conv = uitofp <7 x i16> %in to <7 x float> @@ -156,7 +156,7 @@ define <3 x float> @uitofp_v3i8_to_v3f32(<3 x i8> %in, ptr %dst) { define <11 x float> @uitofp_v11i8_to_v11f32(<11 x i8> %in, ptr %dst) { ; CHECK-LABEL: 'uitofp_v11i8_to_v11f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <11 x i8> %in to <11 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %conv = uitofp <11 x i8> %in to <11 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <11 x float> %conv ; %conv = uitofp <11 x i8> %in to <11 x float> @@ -174,7 +174,7 @@ define <3 x float> @sitofp_v3i32_to_v3f32(<3 x i32> %in, ptr %dst) { define <7 x float> @sitofp_v7i32_to_v7f32(<7 x i32> %in, ptr %dst) { ; CHECK-LABEL: 'sitofp_v7i32_to_v7f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %conv = sitofp <7 x i32> %in to <7 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %conv = sitofp <7 x i32> %in to <7 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <7 x float> %conv ; %conv = sitofp <7 x i32> %in to <7 x float> @@ -192,7 +192,7 @@ define <3 x float> @sitofp_v3i16_to_v3f32(<3 x i16> %in, ptr %dst) { define <6 x float> @sitofp_v6i16_to_v6f32(<6 x i16> %in, ptr %dst) { ; CHECK-LABEL: 'sitofp_v6i16_to_v6f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %conv = sitofp <6 x i16> %in to <6 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:14 Lat:22 SizeLat:22 for: %conv = sitofp <6 x i16> %in to <6 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <6 x float> %conv ; %conv = sitofp <6 x i16> %in to <6 x float> @@ -210,7 +210,7 @@ define <3 x float> @sitofp_v3i8_to_v3f32(<3 x i8> %in, ptr %dst) { define <8 x float> @sitofp_v8i8_to_v8f32(<8 x i8> %in, ptr %dst) { ; CHECK-LABEL: 'sitofp_v8i8_to_v8f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %conv = sitofp <8 x i8> %in to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %conv = sitofp <8 x i8> %in to <8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %conv ; %conv = sitofp <8 x i8> %in to <8 x float> @@ -219,7 +219,7 @@ define <8 x float> @sitofp_v8i8_to_v8f32(<8 x i8> %in, ptr %dst) { define <7 x float> @sitofp_v7i8_to_v7f32(<7 x i8> %in, ptr %dst) { ; CHECK-LABEL: 'sitofp_v7i8_to_v7f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:1 SizeLat:1 for: %conv = sitofp <7 x i8> %in to <7 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:17 Lat:27 SizeLat:27 for: %conv = sitofp <7 x i8> %in to <7 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <7 x float> %conv ; %conv = sitofp <7 x i8> %in to <7 x float> diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll index 9d06b4b..61296a8 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll @@ -5,277 +5,544 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define i8 @fshl_i8_3rd_arg_const(i8 %a, i8 %b) { ; CHECK-LABEL: 'fshl_i8_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) - ret i8 %fshl + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9) + ret i8 %r } define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: 'fshl_i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) - ret i8 %fshl + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) + ret i8 %r } -declare i8 @llvm.fshl.i8(i8, i8, i8) - -define i16 @fshl_i16(i16 %a, i16 %b) { -; CHECK-LABEL: 'fshl_i16' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %fshl +define i16 @fshl_i16_3rd_arg_const(i16 %a, i16 %b) { +; CHECK-LABEL: 'fshl_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r ; entry: - %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) - ret i16 %fshl + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9) + ret i16 %r } -declare i16 @llvm.fshl.i16(i16, i16, i16) +define i16 @fshl_i16_3rd_arg_var(i16 %a, i16 %b, i16 %c) { +; CHECK-LABEL: 'fshl_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %c) + ret i16 %r +} define i32 @fshl_i32_3rd_arg_const(i32 %a, i32 %b) { ; CHECK-LABEL: 'fshl_i32_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshl +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) - ret i32 %fshl + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9) + ret i32 %r } define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: 'fshl_i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) - ret i32 %fshl + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + ret i32 %r } -declare i32 @llvm.fshl.i32(i32, i32, i32) - define i64 @fshl_i64_3rd_arg_const(i64 %a, i64 %b) { ; CHECK-LABEL: 'fshl_i64_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshl +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) - ret i64 %fshl + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9) + ret i64 %r } define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: 'fshl_i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) + ret i64 %r +} + +define i128 @fshl_i128_3rd_arg_const(i128 %a, i128 %b) { +; CHECK-LABEL: 'fshl_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) - ret i64 %fshl + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) + ret i128 %r } -declare i64 @llvm.fshl.i64(i64, i64, i64) +define i128 @fshl_i128_3rd_arg_var(i128 %a, i128 %b, i128 %c) { +; CHECK-LABEL: 'fshl_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:9 SizeLat:9 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 %c) + ret i128 %r +} define i19 @fshl_i19(i19 %a, i19 %b) { ; CHECK-LABEL: 'fshl_i19' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %r ; entry: - %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) - ret i19 %fshl + %r = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9) + ret i19 %r } -declare i19 @llvm.fshl.i19(i19, i19, i19) +define i66 @fshl_i66(i66 %a, i66 %b) { +; CHECK-LABEL: 'fshl_i66' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %r +; +entry: + %r = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) + ret i66 %r +} define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) - ret <16 x i8> %fshl + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <16 x i8> %r } define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) - ret <16 x i8> %fshl + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) + ret <16 x i8> %r } define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %fshl + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %r } -declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) - ret <8 x i16> %fshl + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) + ret <8 x i16> %r } define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) - ret <8 x i16> %fshl + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) + ret <8 x i16> %r } define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) - ret <8 x i16> %fshl + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %r } -declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) - define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) - ret <4 x i32> %fshl + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %r } define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) - ret <4 x i32> %fshl + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) + ret <4 x i32> %r } define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) - ret <4 x i32> %fshl + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %r } -declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) - define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>) - ret <2 x i64> %fshl + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>) + ret <2 x i64> %r } define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) - ret <2 x i64> %fshl + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) + ret <2 x i64> %r } define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) - ret <2 x i64> %fshl + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %r } -declare <2 x i64> @llvm.fshl.v4i64(<2 x i64>, <2 x i64>, <2 x i64>) - define <4 x i30> @fshl_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) { ; CHECK-LABEL: 'fshl_v4i30_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshl +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %r = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %r ; entry: - %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) - ret <4 x i30> %fshl + %r = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) + ret <4 x i30> %r } -declare <4 x i30> @llvm.fshl.v4i30(<4 x i30>, <4 x i30>, <4 x i30>) - define <2 x i66> @fshl_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) { ; CHECK-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshl +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %r ; entry: - %fshl = tail call <2 x i66> @llvm.fshl.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) - ret <2 x i66> %fshl + %r = tail call <2 x i66> @llvm.fshl.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) + ret <2 x i66> %r } -declare <2 x i66> @llvm.fshl.v4i66(<2 x i66>, <2 x i66>, <2 x i66>) -define i66 @fshl_i66(i66 %a, i66 %b) { -; CHECK-LABEL: 'fshl_i66' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %fshl +define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a, <2 x i128> %b) { +; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r ; entry: - %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9) - ret i66 %fshl + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 1>) + ret <2 x i128> %r } -declare i66 @llvm.fshl.i66(i66, i66, i66) - define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) { ; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshl +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) + ret <2 x i128> %r +} + +define <2 x i128> @fshl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) { +; CHECK-LABEL: 'fshl_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:17 Lat:21 SizeLat:21 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) + ret <2 x i128> %r +} + + +; Rotate tests + +define i8 @rotl_i8_3rd_arg_const(i8 %a) { +; CHECK-LABEL: 'rotl_i8_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 9) + ret i8 %r +} + +define i8 @rotl_i8_3rd_arg_var(i8 %a, i8 %c) { +; CHECK-LABEL: 'rotl_i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 %c) + ret i8 %r +} + +define i16 @rotl_i16_3rd_arg_const(i16 %a) { +; CHECK-LABEL: 'rotl_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 9) + ret i16 %r +} + +define i16 @rotl_i16_3rd_arg_var(i16 %a, i16 %c) { +; CHECK-LABEL: 'rotl_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 %c) + ret i16 %r +} + +define i32 @rotl_i32_3rd_arg_const(i32 %a) { +; CHECK-LABEL: 'rotl_i32_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) + ret i32 %r +} + +define i32 @rotl_i32_3rd_arg_var(i32 %a, i32 %c) { +; CHECK-LABEL: 'rotl_i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) + ret i32 %r +} + +define i64 @rotl_i64_3rd_arg_const(i64 %a) { +; CHECK-LABEL: 'rotl_i64_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) + ret i64 %r +} + +define i64 @rotl_i64_3rd_arg_var(i64 %a, i64 %c) { +; CHECK-LABEL: 'rotl_i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshl = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) - ret <2 x i128> %fshl + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) + ret i64 %r } -declare <2 x i128> @llvm.fshl.v4i128(<2 x i128>, <2 x i128>, <2 x i128>) -define i128 @fshl_i128(i128 %a, i128 %b) { -; CHECK-LABEL: 'fshl_i128' -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshl +define i128 @rotl_i128_3rd_arg_const(i128 %a) { +; CHECK-LABEL: 'rotl_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) - ret i128 %fshl + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 9) + ret i128 %r } -declare i128 @llvm.fshl.i128(i128, i128, i128) +define i128 @rotl_i128_3rd_arg_var(i128 %a, i128 %c) { +; CHECK-LABEL: 'rotl_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 %c) + ret i128 %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <16 x i8> %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) + ret <16 x i8> %r +} + +define <16 x i8> @rotl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %c) { +; CHECK-LABEL: 'rotl_v16i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) + ret <16 x i8> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) + ret <8 x i16> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) + ret <8 x i16> %r +} + +define <8 x i16> @rotl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %c) { +; CHECK-LABEL: 'rotl_v8i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) + ret <8 x i16> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) + ret <4 x i32> %r +} + +define <4 x i32> @rotl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %c) { +; CHECK-LABEL: 'rotl_v4i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) + ret <4 x i32> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 1>) + ret <2 x i64> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>) + ret <2 x i64> %r +} + +define <2 x i64> @rotl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %c) { +; CHECK-LABEL: 'rotl_v2i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) + ret <2 x i64> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 1>) + ret <2 x i128> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>) + ret <2 x i128> %r +} + +define <2 x i128> @rotl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %c) { +; CHECK-LABEL: 'rotl_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) + ret <2 x i128> %r +} diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll index b31806b..1aa6de9 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll @@ -5,277 +5,544 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define i8 @fshr_i8_3rd_arg_const(i8 %a, i8 %b) { ; CHECK-LABEL: 'fshr_i8_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) - ret i8 %fshr + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9) + ret i8 %r } define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: 'fshr_i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r ; entry: - %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) - ret i8 %fshr + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) + ret i8 %r } -declare i8 @llvm.fshr.i8(i8, i8, i8) - -define i16 @fshr_i16(i16 %a, i16 %b) { -; CHECK-LABEL: 'fshr_i16' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %fshr +define i16 @fshr_i16_3rd_arg_const(i16 %a, i16 %b) { +; CHECK-LABEL: 'fshr_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r ; entry: - %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) - ret i16 %fshr + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9) + ret i16 %r } -declare i16 @llvm.fshr.i16(i16, i16, i16) +define i16 @fshr_i16_3rd_arg_var(i16 %a, i16 %b, i16 %c) { +; CHECK-LABEL: 'fshr_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %c) + ret i16 %r +} define i32 @fshr_i32_3rd_arg_const(i32 %a, i32 %b) { ; CHECK-LABEL: 'fshr_i32_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshr +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) - ret i32 %fshr + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9) + ret i32 %r } define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: 'fshr_i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r ; entry: - %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) - ret i32 %fshr + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) + ret i32 %r } -declare i32 @llvm.fshr.i32(i32, i32, i32) - define i64 @fshr_i64_3rd_arg_const(i64 %a, i64 %b) { ; CHECK-LABEL: 'fshr_i64_3rd_arg_const' -; CHECK-NEXT: Cost Model: Found costs of 1 for: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshr +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) - ret i64 %fshr + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9) + ret i64 %r } define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: 'fshr_i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) + ret i64 %r +} + +define i128 @fshr_i128_3rd_arg_const(i128 %a, i128 %b) { +; CHECK-LABEL: 'fshr_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) - ret i64 %fshr + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) + ret i128 %r } -declare i64 @llvm.fshr.i64(i64, i64, i64) +define i128 @fshr_i128_3rd_arg_var(i128 %a, i128 %b, i128 %c) { +; CHECK-LABEL: 'fshr_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:9 Lat:9 SizeLat:9 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 %c) + ret i128 %r +} define i19 @fshr_i19(i19 %a, i19 %b) { ; CHECK-LABEL: 'fshr_i19' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %r ; entry: - %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) - ret i19 %fshr + %r = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9) + ret i19 %r } -declare i19 @llvm.fshr.i19(i19, i19, i19) +define i66 @fshr_i66(i66 %a, i66 %b) { +; CHECK-LABEL: 'fshr_i66' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %r = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %r +; +entry: + %r = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) + ret i66 %r +} define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) - ret <16 x i8> %fshr + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <16 x i8> %r } define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) - ret <16 x i8> %fshr + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) + ret <16 x i8> %r } define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r ; entry: - %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %fshr + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %r } -declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) - ret <8 x i16> %fshr + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) + ret <8 x i16> %r } define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) - ret <8 x i16> %fshr + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) + ret <8 x i16> %r } define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r ; entry: - %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) - ret <8 x i16> %fshr + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %r } -declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) - define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) - ret <4 x i32> %fshr + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %r } define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) - ret <4 x i32> %fshr + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) + ret <4 x i32> %r } define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r ; entry: - %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) - ret <4 x i32> %fshr + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %r } -declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) - define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found costs of 2 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>) - ret <2 x i64> %fshr + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>) + ret <2 x i64> %r } define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of 6 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr +; CHECK-NEXT: Cost Model: Found costs of 6 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) - ret <2 x i64> %fshr + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>) + ret <2 x i64> %r } define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of 7 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr +; CHECK-NEXT: Cost Model: Found costs of 7 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r ; entry: - %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) - ret <2 x i64> %fshr + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %r } -declare <2 x i64> @llvm.fshr.v4i64(<2 x i64>, <2 x i64>, <2 x i64>) - define <4 x i30> @fshr_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) { ; CHECK-LABEL: 'fshr_v4i30_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshr +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %r = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %r ; entry: - %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) - ret <4 x i30> %fshr + %r = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) + ret <4 x i30> %r } -declare <4 x i30> @llvm.fshr.v4i30(<4 x i30>, <4 x i30>, <4 x i30>) - define <2 x i66> @fshr_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) { ; CHECK-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshr +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %r ; entry: - %fshr = tail call <2 x i66> @llvm.fshr.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) - ret <2 x i66> %fshr + %r = tail call <2 x i66> @llvm.fshr.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>) + ret <2 x i66> %r } -declare <2 x i66> @llvm.fshr.v4i66(<2 x i66>, <2 x i66>, <2 x i66>) -define i66 @fshr_i66(i66 %a, i66 %b) { -; CHECK-LABEL: 'fshr_i66' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %fshr +define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a, <2 x i128> %b) { +; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r ; entry: - %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9) - ret i66 %fshr + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 1>) + ret <2 x i128> %r } -declare i66 @llvm.fshr.i66(i66, i66, i66) - define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) { ; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshr +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) + ret <2 x i128> %r +} + +define <2 x i128> @fshr_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) { +; CHECK-LABEL: 'fshr_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:17 Lat:21 SizeLat:21 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) + ret <2 x i128> %r +} + + +; Rotate tests + +define i8 @rotr_i8_3rd_arg_const(i8 %a) { +; CHECK-LABEL: 'rotr_i8_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 9) + ret i8 %r +} + +define i8 @rotr_i8_3rd_arg_var(i8 %a, i8 %c) { +; CHECK-LABEL: 'rotr_i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r +; +entry: + %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 %c) + ret i8 %r +} + +define i16 @rotr_i16_3rd_arg_const(i16 %a) { +; CHECK-LABEL: 'rotr_i16_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 9) + ret i16 %r +} + +define i16 @rotr_i16_3rd_arg_var(i16 %a, i16 %c) { +; CHECK-LABEL: 'rotr_i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r +; +entry: + %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 %c) + ret i16 %r +} + +define i32 @rotr_i32_3rd_arg_const(i32 %a) { +; CHECK-LABEL: 'rotr_i32_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) + ret i32 %r +} + +define i32 @rotr_i32_3rd_arg_var(i32 %a, i32 %c) { +; CHECK-LABEL: 'rotr_i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) + ret i32 %r +} + +define i64 @rotr_i64_3rd_arg_const(i64 %a) { +; CHECK-LABEL: 'rotr_i64_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) + ret i64 %r +} + +define i64 @rotr_i64_3rd_arg_var(i64 %a, i64 %c) { +; CHECK-LABEL: 'rotr_i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r ; entry: - %fshr = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>) - ret <2 x i128> %fshr + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) + ret i64 %r } -declare <2 x i128> @llvm.fshr.v4i128(<2 x i128>, <2 x i128>, <2 x i128>) -define i128 @fshr_i128(i128 %a, i128 %b) { -; CHECK-LABEL: 'fshr_i128' -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshr +define i128 @rotr_i128_3rd_arg_const(i128 %a) { +; CHECK-LABEL: 'rotr_i128_3rd_arg_const' +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r ; entry: - %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) - ret i128 %fshr + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 9) + ret i128 %r } -declare i128 @llvm.fshr.i128(i128, i128, i128) +define i128 @rotr_i128_3rd_arg_var(i128 %a, i128 %c) { +; CHECK-LABEL: 'rotr_i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r +; +entry: + %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %c) + ret i128 %r +} + +define <16 x i8> @rotr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a) { +; CHECK-LABEL: 'rotr_v16i8_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <16 x i8> %r +} + +define <16 x i8> @rotr_v16i8_3rd_arg_vec_const_lanes_different(<16 x i8> %a) { +; CHECK-LABEL: 'rotr_v16i8_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>) + ret <16 x i8> %r +} + +define <16 x i8> @rotr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %c) { +; CHECK-LABEL: 'rotr_v16i8_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r +; +entry: + %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c) + ret <16 x i8> %r +} + +define <8 x i16> @rotr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a) { +; CHECK-LABEL: 'rotr_v8i16_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) + ret <8 x i16> %r +} + +define <8 x i16> @rotr_v8i16_3rd_arg_vec_const_lanes_different(<8 x i16> %a) { +; CHECK-LABEL: 'rotr_v8i16_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>) + ret <8 x i16> %r +} + +define <8 x i16> @rotr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %c) { +; CHECK-LABEL: 'rotr_v8i16_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r +; +entry: + %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c) + ret <8 x i16> %r +} + +define <4 x i32> @rotr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a) { +; CHECK-LABEL: 'rotr_v4i32_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 3)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %r +} + +define <4 x i32> @rotr_v4i32_3rd_arg_vec_const_lanes_different(<4 x i32> %a) { +; CHECK-LABEL: 'rotr_v4i32_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>) + ret <4 x i32> %r +} + +define <4 x i32> @rotr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %c) { +; CHECK-LABEL: 'rotr_v4i32_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r +; +entry: + %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c) + ret <4 x i32> %r +} + +define <2 x i64> @rotr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a) { +; CHECK-LABEL: 'rotr_v2i64_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 1>) + ret <2 x i64> %r +} + +define <2 x i64> @rotr_v2i64_3rd_arg_vec_const_lanes_different(<2 x i64> %a) { +; CHECK-LABEL: 'rotr_v2i64_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of 4 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>) + ret <2 x i64> %r +} + +define <2 x i64> @rotr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %c) { +; CHECK-LABEL: 'rotr_v2i64_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of 5 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r +; +entry: + %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c) + ret <2 x i64> %r +} + +define <2 x i128> @rotr_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a) { +; CHECK-LABEL: 'rotr_v2i128_3rd_arg_vec_const_all_lanes_same' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> splat (i128 1)) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 1>) + ret <2 x i128> %r +} + +define <2 x i128> @rotr_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a) { +; CHECK-LABEL: 'rotr_v2i128_3rd_arg_vec_const_lanes_different' +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>) + ret <2 x i128> %r +} + +define <2 x i128> @rotr_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %c) { +; CHECK-LABEL: 'rotr_v2i128_3rd_arg_var' +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r +; +entry: + %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c) + ret <2 x i128> %r +} diff --git a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll index cef6cf1..55a14b8 100644 --- a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll +++ b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll @@ -1,10 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s -; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-n1 | FileCheck %s -; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-n2 | FileCheck %s -; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-v1 | FileCheck %s -; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-v2 | FileCheck %s -; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=kryo | FileCheck %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck --check-prefixes=CHECK,GENERIC %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-n1 | FileCheck --check-prefixes=CHECK,GENERIC %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-n2 | FileCheck --check-prefixes=CHECK,GENERIC %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-v1 | FileCheck --check-prefixes=CHECK,GENERIC %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=neoverse-v2 | FileCheck --check-prefixes=CHECK,GENERIC %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=kryo | FileCheck --check-prefixes=CHECK,GENERIC %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=apple-m1 | FileCheck --check-prefixes=CHECK,FAST-LD1 %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=apple-m2 | FileCheck --check-prefixes=CHECK,FAST-LD1 %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=apple-m3 | FileCheck --check-prefixes=CHECK,FAST-LD1 %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=apple-m4 | FileCheck --check-prefixes=CHECK,FAST-LD1 %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=apple-m5 | FileCheck --check-prefixes=CHECK,FAST-LD1 %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mcpu=apple-latest | FileCheck --check-prefixes=CHECK,FAST-LD1 %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mattr=+fast-ld1-single | FileCheck --check-prefixes=CHECK,FAST-LD1 %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" @@ -84,10 +91,15 @@ define void @vectorInstrCost() { ;; LD1: Load one single-element structure to one lane of one register. define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) { -; CHECK-LABEL: 'LD1_B' -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %i, align 1 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %v2 +; GENERIC-LABEL: 'LD1_B' +; GENERIC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %i, align 1 +; GENERIC-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 +; GENERIC-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %v2 +; +; FAST-LD1-LABEL: 'LD1_B' +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %i, align 1 +; FAST-LD1-NEXT: Cost Model: Found costs of 0 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %v2 ; entry: %v1 = load i8, ptr %i, align 1 @@ -96,10 +108,15 @@ entry: } define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) { -; CHECK-LABEL: 'LD1_H' -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i16, ptr %i, align 2 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %v2 +; GENERIC-LABEL: 'LD1_H' +; GENERIC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i16, ptr %i, align 2 +; GENERIC-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 +; GENERIC-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %v2 +; +; FAST-LD1-LABEL: 'LD1_H' +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i16, ptr %i, align 2 +; FAST-LD1-NEXT: Cost Model: Found costs of 0 for: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2 +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %v2 ; entry: %v1 = load i16, ptr %i, align 2 @@ -108,10 +125,15 @@ entry: } define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) { -; CHECK-LABEL: 'LD1_W' -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i32, ptr %i, align 4 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %v2 +; GENERIC-LABEL: 'LD1_W' +; GENERIC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i32, ptr %i, align 4 +; GENERIC-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 +; GENERIC-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %v2 +; +; FAST-LD1-LABEL: 'LD1_W' +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i32, ptr %i, align 4 +; FAST-LD1-NEXT: Cost Model: Found costs of 0 for: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3 +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %v2 ; entry: %v1 = load i32, ptr %i, align 4 @@ -120,13 +142,34 @@ entry: } define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) { -; CHECK-LABEL: 'LD1_X' -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i64, ptr %i, align 8 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %v2 +; GENERIC-LABEL: 'LD1_X' +; GENERIC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i64, ptr %i, align 8 +; GENERIC-NEXT: Cost Model: Found costs of RThru:3 CodeSize:0 Lat:3 SizeLat:3 for: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 +; GENERIC-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %v2 +; +; FAST-LD1-LABEL: 'LD1_X' +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i64, ptr %i, align 8 +; FAST-LD1-NEXT: Cost Model: Found costs of 0 for: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 +; FAST-LD1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %v2 ; entry: %v1 = load i64, ptr %i, align 8 %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0 ret <2 x i64> %v2 } + +;; Multi-use tests: Check that Load context is only applied when load has one use + +define <8 x i8> @LD1_multi_use_load(<8 x i8> %vec, ptr noundef %i, ptr noundef %out) { +; CHECK-LABEL: 'LD1_multi_use_load' +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %i, align 1 +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 +; CHECK-NEXT: Cost Model: Found costs of 1 for: store i8 %v1, ptr %out, align 1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %v2 +; +entry: + %v1 = load i8, ptr %i, align 1 + %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1 + store i8 %v1, ptr %out, align 1 ; Load has multiple uses + ret <8 x i8> %v2 +} diff --git a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll new file mode 100644 index 0000000..74bd41d --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-EXPANDED +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s --check-prefixes=CHECK,CHECK-SVE2 +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sme | FileCheck %s --check-prefixes=CHECK,CHECK-SME + +; loop.dependence.{war,raw}.mask can be lowered to while{wr,rw} if SVE2 or SME is enabled. +define void @loop_dependence_war_mask(ptr %a, ptr %b) { +; CHECK-EXPANDED-LABEL: 'loop_dependence_war_mask' +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-LABEL: 'loop_dependence_war_mask' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) + %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) + %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) + %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) + + %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) + %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) + %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) + %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) + + ret void +} + +define void @loop_dependence_raw_mask(ptr %a, ptr %b) { +; CHECK-EXPANDED-LABEL: 'loop_dependence_raw_mask' +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-LABEL: 'loop_dependence_raw_mask' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) + %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) + %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) + %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) + + %res5 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) + %res6 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) + %res7 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) + %res8 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) + ret void +} + +; Invalid element size and return type combinations must be expanded, even with sve2/sme +define void @loop_dependence_war_mask_invalid(ptr %a, ptr %b) { +; CHECK-EXPANDED-LABEL: 'loop_dependence_war_mask_invalid' +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 10) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-SVE2-LABEL: 'loop_dependence_war_mask_invalid' +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 8) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 4) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 2) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 1) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 10) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-SME-LABEL: 'loop_dependence_war_mask_invalid' +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 8) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 4) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 2) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 1) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.nxv2i1(ptr %a, ptr %b, i64 10) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) + %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) + %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) + %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) + %res5 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) + + %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) + %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) + %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) + %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) + %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) + ret void +} + +define void @loop_dependence_raw_mask_invalid(ptr %a, ptr %b) { +; CHECK-EXPANDED-LABEL: 'loop_dependence_raw_mask_invalid' +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 8) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 4) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 2) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 1) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 10) +; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-SVE2-LABEL: 'loop_dependence_raw_mask_invalid' +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 8) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 4) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 2) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 1) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 10) +; CHECK-SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-SME-LABEL: 'loop_dependence_raw_mask_invalid' +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 8) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 4) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 2) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 1) +; CHECK-SME-NEXT: Cost Model: Invalid cost for instruction: %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.nxv2i1(ptr %a, ptr %b, i64 10) +; CHECK-SME-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) + %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) + %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) + %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) + %res5 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) + + %res6 = call <vscale x 16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) + %res7 = call <vscale x 8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) + %res8 = call <vscale x 4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) + %res9 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) + %res10 = call <vscale x 2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll index b4ced24..780db09 100644 --- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll @@ -114,67 +114,67 @@ entry: define void @scalable_ext_loads() { ; CHECK-LABEL: 'scalable_ext_loads' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 8 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 8 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv16i8to32 = zext <vscale x 16 x i8> %load.nxv16i8.2 to <vscale x 16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zext.nxv16i8to32 = zext <vscale x 16 x i8> %load.nxv16i8.2 to <vscale x 16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 8 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv16i8to64 = zext <vscale x 16 x i8> %load.nxv16i8.3 to <vscale x 16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 14 for: %zext.nxv16i8to64 = zext <vscale x 16 x i8> %load.nxv16i8.3 to <vscale x 16 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv8i8to16 = zext <vscale x 8 x i8> %load.nxv8i8 to <vscale x 8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv8i8.2 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv8i8to32 = zext <vscale x 8 x i8> %load.nxv8i8.2 to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv8i8to32 = zext <vscale x 8 x i8> %load.nxv8i8.2 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv8i8.3 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv8i8to64 = zext <vscale x 8 x i8> %load.nxv8i8.3 to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zext.nxv8i8to64 = zext <vscale x 8 x i8> %load.nxv8i8.3 to <vscale x 8 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv4i8to32 = zext <vscale x 4 x i8> %load.nxv4i8 to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv4i8.2 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv4i8to64 = zext <vscale x 4 x i8> %load.nxv4i8.2 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv4i8to64 = zext <vscale x 4 x i8> %load.nxv4i8.2 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv2i8to64 = zext <vscale x 2 x i8> %load.nxv2i8 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv8i16to64 = zext <vscale x 8 x i16> %load.nxv8i16.2 to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zext.nxv8i16to64 = zext <vscale x 8 x i16> %load.nxv8i16.2 to <vscale x 8 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv4i16to32 = zext <vscale x 4 x i16> %load.nxv4i16 to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv4i16.2 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv4i16to64 = zext <vscale x 4 x i16> %load.nxv4i16.2 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv4i16to64 = zext <vscale x 4 x i16> %load.nxv4i16.2 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv2i16to64 = zext <vscale x 2 x i16> %load.nxv2i16 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv2i32to64 = zext <vscale x 2 x i32> %load.nxv2i32 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 8 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 8 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv16i8to32 = sext <vscale x 16 x i8> %load2.nxv16i8.2 to <vscale x 16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sext.nxv16i8to32 = sext <vscale x 16 x i8> %load2.nxv16i8.2 to <vscale x 16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 8 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv16i8to64 = sext <vscale x 16 x i8> %load2.nxv16i8.3 to <vscale x 16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 14 for: %sext.nxv16i8to64 = sext <vscale x 16 x i8> %load2.nxv16i8.3 to <vscale x 16 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv8i8to16 = sext <vscale x 8 x i8> %load2.nxv8i8 to <vscale x 8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv8i8.2 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv8i8to32 = sext <vscale x 8 x i8> %load2.nxv8i8.2 to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv8i8to32 = sext <vscale x 8 x i8> %load2.nxv8i8.2 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv8i8.3 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv8i8to64 = sext <vscale x 8 x i8> %load2.nxv8i8.3 to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sext.nxv8i8to64 = sext <vscale x 8 x i8> %load2.nxv8i8.3 to <vscale x 8 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv4i8to32 = sext <vscale x 4 x i8> %load2.nxv4i8 to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv4i8.2 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv4i8to64 = sext <vscale x 4 x i8> %load2.nxv4i8.2 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv4i8to64 = sext <vscale x 4 x i8> %load2.nxv4i8.2 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv2i8to64 = sext <vscale x 2 x i8> %load2.nxv2i8 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv8i16to64 = sext <vscale x 8 x i16> %load2.nxv8i16.2 to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sext.nxv8i16to64 = sext <vscale x 8 x i16> %load2.nxv8i16.2 to <vscale x 8 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv4i16to32 = sext <vscale x 4 x i16> %load2.nxv4i16 to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv4i16.2 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv4i16to64 = sext <vscale x 4 x i16> %load2.nxv4i16.2 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv4i16to64 = sext <vscale x 4 x i16> %load2.nxv4i16.2 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv2i16to64 = sext <vscale x 2 x i16> %load2.nxv2i16 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %load2.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv2i32to64 = sext <vscale x 2 x i32> %load2.nxv2i32 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void diff --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll index fa53a18..1920fc9 100644 --- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll +++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst_vls.ll @@ -1,17 +1,6 @@ ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=256 | FileCheck %s -D#VBITS=256 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=384 | FileCheck %s -D#VBITS=256 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=512 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=640 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=768 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=896 | FileCheck %s -D#VBITS=512 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1024 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1152 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1280 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1408 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1536 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1664 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1792 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1920 | FileCheck %s -D#VBITS=1024 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=2048 | FileCheck %s -D#VBITS=2048 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll index 9aea58e..0ec8cb5 100644 --- a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll +++ b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll @@ -7,11 +7,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @uitofp() { ; CHECK-NONEON-LABEL: 'uitofp' -; CHECK-NONEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <16 x i64> poison to <16 x float> +; CHECK-NONEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %conv = uitofp <16 x i64> poison to <16 x float> ; CHECK-NONEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-WITHSVE-LABEL: 'uitofp' -; CHECK-WITHSVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <16 x i64> poison to <16 x float> +; CHECK-WITHSVE-NEXT: Cost Model: Found costs of 16 for: %conv = uitofp <16 x i64> poison to <16 x float> ; CHECK-WITHSVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %conv = uitofp <16 x i64> poison to <16 x float> diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll index 4579acb..255877f 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -490,3 +490,15 @@ define void @vst4(ptr %p) { ret void } + +define void @identity_shuffle_costs() #0 { +bb: + ; CHECK-LABEL: 'identity_shuffle_costs' + ; CHECK: Cost Model: Found costs of 0 for: %shufflevector142 = shufflevector <16 x i8> zeroinitializer, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ; CHECK: Cost Model: Found costs of 0 for: %shufflevector84 = shufflevector <16 x i8> zeroinitializer, <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> + %shufflevector142 = shufflevector <16 x i8> zeroinitializer, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %shufflevector84 = shufflevector <16 x i8> zeroinitializer, <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> + ret void +} + +attributes #0 = { "target-features"="+sve,+neon" } diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-transpose.ll index 4c48430..402313d 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-transpose.ll @@ -12,6 +12,15 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ret <8 x i8> %tmp0 } +define <8 x i8> @trn1.v8i8_flipped(<8 x i8> %v0, <8 x i8> %v1) { +; CHECK-LABEL: 'trn1.v8i8_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %tmp0 +; + %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> + ret <8 x i8> %tmp0 +} + define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: 'trn2.v8i8' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -21,6 +30,15 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ret <8 x i8> %tmp0 } +define <8 x i8> @trn2.v8i8_flipped(<8 x i8> %v0, <8 x i8> %v1) { +; CHECK-LABEL: 'trn2.v8i8_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %tmp0 +; + %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> + ret <8 x i8> %tmp0 +} + define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn1.v16i8' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> @@ -30,6 +48,15 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ret <16 x i8> %tmp0 } +define <16 x i8> @trn1.v16i8_flipped(<16 x i8> %v0, <16 x i8> %v1) { +; CHECK-LABEL: 'trn1.v16i8_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %tmp0 +; + %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14> + ret <16 x i8> %tmp0 +} + define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: 'trn2.v16i8' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> @@ -39,6 +66,15 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ret <16 x i8> %tmp0 } +define <16 x i8> @trn2.v16i8_flipped(<16 x i8> %v0, <16 x i8> %v1) { +; CHECK-LABEL: 'trn2.v16i8_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 17, i32 1, i32 19, i32 3, i32 21, i32 5, i32 23, i32 7, i32 25, i32 9, i32 27, i32 11, i32 29, i32 13, i32 31, i32 15> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %tmp0 +; + %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 17, i32 1, i32 19, i32 3, i32 21, i32 5, i32 23, i32 7, i32 25, i32 9, i32 27, i32 11, i32 29, i32 13, i32 31, i32 15> + ret <16 x i8> %tmp0 +} + define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn1.v4i16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -48,6 +84,15 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ret <4 x i16> %tmp0 } +define <4 x i16> @trn1.v4i16_flipped(<4 x i16> %v0, <4 x i16> %v1) { +; CHECK-LABEL: 'trn1.v4i16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %tmp0 +; + %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> + ret <4 x i16> %tmp0 +} + define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: 'trn2.v4i16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -57,6 +102,15 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ret <4 x i16> %tmp0 } +define <4 x i16> @trn2.v4i16_flipped(<4 x i16> %v0, <4 x i16> %v1) { +; CHECK-LABEL: 'trn2.v4i16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %tmp0 +; + %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> + ret <4 x i16> %tmp0 +} + define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn1.v8i16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -66,6 +120,15 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ret <8 x i16> %tmp0 } +define <8 x i16> @trn1.v8i16_flipped(<8 x i16> %v0, <8 x i16> %v1) { +; CHECK-LABEL: 'trn1.v8i16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %tmp0 +; + %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> + ret <8 x i16> %tmp0 +} + define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: 'trn2.v8i16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -75,6 +138,15 @@ define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ret <8 x i16> %tmp0 } +define <8 x i16> @trn2.v8i16_flipped(<8 x i16> %v0, <8 x i16> %v1) { +; CHECK-LABEL: 'trn2.v8i16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %tmp0 +; + %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> + ret <8 x i16> %tmp0 +} + define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: 'trn1.v2i32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2> @@ -84,6 +156,15 @@ define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ret <2 x i32> %tmp0 } +define <2 x i32> @trn1.v2i32_flipped(<2 x i32> %v0, <2 x i32> %v1) { +; CHECK-LABEL: 'trn1.v2i32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 2, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %tmp0 +; + %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 2, i32 0> + ret <2 x i32> %tmp0 +} + define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: 'trn2.v2i32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3> @@ -93,6 +174,15 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ret <2 x i32> %tmp0 } +define <2 x i32> @trn2.v2i32_flipped(<2 x i32> %v0, <2 x i32> %v1) { +; CHECK-LABEL: 'trn2.v2i32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 3, i32 1> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %tmp0 +; + %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 3, i32 1> + ret <2 x i32> %tmp0 +} + define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn1.v4i32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -102,6 +192,15 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %tmp0 } +define <4 x i32> @trn1.v4i32_flipped(<4 x i32> %v0, <4 x i32> %v1) { +; CHECK-LABEL: 'trn1.v4i32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %tmp0 +; + %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> + ret <4 x i32> %tmp0 +} + define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: 'trn2.v4i32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -111,6 +210,15 @@ define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ret <4 x i32> %tmp0 } +define <4 x i32> @trn2.v4i32_flipped(<4 x i32> %v0, <4 x i32> %v1) { +; CHECK-LABEL: 'trn2.v4i32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %tmp0 +; + %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> + ret <4 x i32> %tmp0 +} + define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ; CHECK-LABEL: 'trn1.v2i64' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2> @@ -120,6 +228,15 @@ define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ret <2 x i64> %tmp0 } +define <2 x i64> @trn1.v2i64_flipped(<2 x i64> %v0, <2 x i64> %v1) { +; CHECK-LABEL: 'trn1.v2i64_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 2, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %tmp0 +; + %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 2, i32 0> + ret <2 x i64> %tmp0 +} + define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ; CHECK-LABEL: 'trn2.v2i64' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3> @@ -129,6 +246,15 @@ define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ret <2 x i64> %tmp0 } +define <2 x i64> @trn2.v2i64_flipped(<2 x i64> %v0, <2 x i64> %v1) { +; CHECK-LABEL: 'trn2.v2i64_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 3, i32 1> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %tmp0 +; + %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 3, i32 1> + ret <2 x i64> %tmp0 +} + define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK-LABEL: 'trn1.v2f32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2> @@ -138,6 +264,15 @@ define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) { ret <2 x float> %tmp0 } +define <2 x float> @trn1.v2f32_flipped(<2 x float> %v0, <2 x float> %v1) { +; CHECK-LABEL: 'trn1.v2f32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 2, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %tmp0 +; + %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 2, i32 0> + ret <2 x float> %tmp0 +} + define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK-LABEL: 'trn2.v2f32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3> @@ -147,6 +282,15 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { ret <2 x float> %tmp0 } +define <2 x float> @trn2.v2f32_flipped(<2 x float> %v0, <2 x float> %v1) { +; CHECK-LABEL: 'trn2.v2f32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 3, i32 1> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %tmp0 +; + %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 3, i32 1> + ret <2 x float> %tmp0 +} + define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn1.v4f32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -156,6 +300,15 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ret <4 x float> %tmp0 } +define <4 x float> @trn1.v4f32_flipped(<4 x float> %v0, <4 x float> %v1) { +; CHECK-LABEL: 'trn1.v4f32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %tmp0 +; + %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> + ret <4 x float> %tmp0 +} + define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: 'trn2.v4f32' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -165,6 +318,15 @@ define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ret <4 x float> %tmp0 } +define <4 x float> @trn2.v4f32_flipped(<4 x float> %v0, <4 x float> %v1) { +; CHECK-LABEL: 'trn2.v4f32_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %tmp0 +; + %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> + ret <4 x float> %tmp0 +} + define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) { ; CHECK-LABEL: 'trn1.v2f64' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2> @@ -174,6 +336,15 @@ define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) { ret <2 x double> %tmp0 } +define <2 x double> @trn1.v2f64_flipped(<2 x double> %v0, <2 x double> %v1) { +; CHECK-LABEL: 'trn1.v2f64_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 2, i32 0> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %tmp0 +; + %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 2, i32 0> + ret <2 x double> %tmp0 +} + define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { ; CHECK-LABEL: 'trn2.v2f64' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3> @@ -183,6 +354,15 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { ret <2 x double> %tmp0 } +define <2 x double> @trn2.v2f64_flipped(<2 x double> %v0, <2 x double> %v1) { +; CHECK-LABEL: 'trn2.v2f64_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 3, i32 1> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %tmp0 +; + %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 3, i32 1> + ret <2 x double> %tmp0 +} + define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn1.v4f16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -192,6 +372,15 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ret <4 x half> %tmp0 } +define <4 x half> @trn1.v4f16_flipped(<4 x half> %v0, <4 x half> %v1) { +; CHECK-LABEL: 'trn1.v4f16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %tmp0 +; + %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> + ret <4 x half> %tmp0 +} + define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: 'trn2.v4f16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -201,6 +390,15 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ret <4 x half> %tmp0 } +define <4 x half> @trn2.v4f16_flipped(<4 x half> %v0, <4 x half> %v1) { +; CHECK-LABEL: 'trn2.v4f16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x half> %tmp0 +; + %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> + ret <4 x half> %tmp0 +} + define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn1.v8f16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -210,6 +408,15 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ret <8 x half> %tmp0 } +define <8 x half> @trn1.v8f16_flipped(<8 x half> %v0, <8 x half> %v1) { +; CHECK-LABEL: 'trn1.v8f16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %tmp0 +; + %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> + ret <8 x half> %tmp0 +} + define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: 'trn2.v8f16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -219,6 +426,15 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ret <8 x half> %tmp0 } +define <8 x half> @trn2.v8f16_flipped(<8 x half> %v0, <8 x half> %v1) { +; CHECK-LABEL: 'trn2.v8f16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %tmp0 +; + %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> + ret <8 x half> %tmp0 +} + define <4 x bfloat> @trn1.v4bf16(<4 x bfloat> %v0, <4 x bfloat> %v1) { ; CHECK-LABEL: 'trn1.v4bf16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -228,6 +444,15 @@ define <4 x bfloat> @trn1.v4bf16(<4 x bfloat> %v0, <4 x bfloat> %v1) { ret <4 x bfloat> %tmp0 } +define <4 x bfloat> @trn1.v4bf16_flipped(<4 x bfloat> %v0, <4 x bfloat> %v1) { +; CHECK-LABEL: 'trn1.v4bf16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %tmp0 +; + %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 4, i32 0, i32 6, i32 2> + ret <4 x bfloat> %tmp0 +} + define <4 x bfloat> @trn2.v4bf16(<4 x bfloat> %v0, <4 x bfloat> %v1) { ; CHECK-LABEL: 'trn2.v4bf16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -237,6 +462,15 @@ define <4 x bfloat> @trn2.v4bf16(<4 x bfloat> %v0, <4 x bfloat> %v1) { ret <4 x bfloat> %tmp0 } +define <4 x bfloat> @trn2.v4bf16_flipped(<4 x bfloat> %v0, <4 x bfloat> %v1) { +; CHECK-LABEL: 'trn2.v4bf16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %tmp0 +; + %tmp0 = shufflevector <4 x bfloat> %v0, <4 x bfloat> %v1, <4 x i32> <i32 5, i32 1, i32 7, i32 3> + ret <4 x bfloat> %tmp0 +} + define <8 x bfloat> @trn1.v8bf16(<8 x bfloat> %v0, <8 x bfloat> %v1) { ; CHECK-LABEL: 'trn1.v8bf16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -246,6 +480,15 @@ define <8 x bfloat> @trn1.v8bf16(<8 x bfloat> %v0, <8 x bfloat> %v1) { ret <8 x bfloat> %tmp0 } +define <8 x bfloat> @trn1.v8bf16_flipped(<8 x bfloat> %v0, <8 x bfloat> %v1) { +; CHECK-LABEL: 'trn1.v8bf16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %tmp0 +; + %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> + ret <8 x bfloat> %tmp0 +} + define <8 x bfloat> @trn2.v8bf16(<8 x bfloat> %v0, <8 x bfloat> %v1) { ; CHECK-LABEL: 'trn2.v8bf16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -254,3 +497,12 @@ define <8 x bfloat> @trn2.v8bf16(<8 x bfloat> %v0, <8 x bfloat> %v1) { %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> ret <8 x bfloat> %tmp0 } + +define <8 x bfloat> @trn2.v8bf16_flipped(<8 x bfloat> %v0, <8 x bfloat> %v1) { +; CHECK-LABEL: 'trn2.v8bf16_flipped' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %tmp0 +; + %tmp0 = shufflevector <8 x bfloat> %v0, <8 x bfloat> %v1, <8 x i32> <i32 9, i32 1, i32 11, i32 3, i32 13, i32 5, i32 15, i32 7> + ret <8 x bfloat> %tmp0 +} diff --git a/llvm/test/Analysis/CostModel/AArch64/sincos.ll b/llvm/test/Analysis/CostModel/AArch64/sincos.ll index 32408acb..48537f6 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sincos.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sincos.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "sincos" ; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s ; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL -passes="print<cost-model>" -intrinsic-cost-strategy=intrinsic-cost -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefix=CHECK-VECLIB +; RUN: opt < %s -mtriple=arm64-apple-macos10.9 -mattr=+neon -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck -check-prefix=SINCOS_STRET %s define void @sincos() { ; CHECK-LABEL: 'sincos' @@ -8,13 +9,11 @@ define void @sincos() { ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison) ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison) ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison) -; ; CHECK: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison) ; CHECK: Cost Model: Found an estimated cost of 52 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison) ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison) ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison) ; CHECK: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison) -; ; CHECK: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison) ; CHECK: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison) ; CHECK: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison) @@ -26,19 +25,33 @@ define void @sincos() { ; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison) -; ; CHECK-VECLIB: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison) -; ; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison) ; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison) ; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison) ; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison) ; +; SINCOS_STRET-LABEL: 'sincos' +; SINCOS_STRET: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 52 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 24 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison) +; SINCOS_STRET: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison) +; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison) +; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison) +; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison) +; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison) +; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison) +; %f16 = call { half, half } @llvm.sincos.f16(half poison) %f32 = call { float, float } @llvm.sincos.f32(float poison) %f64 = call { double, double } @llvm.sincos.f64(double poison) diff --git a/llvm/test/Analysis/CostModel/AArch64/splice.ll b/llvm/test/Analysis/CostModel/AArch64/splice.ll index 1d76a48..bb787d0 100644 --- a/llvm/test/Analysis/CostModel/AArch64/splice.ll +++ b/llvm/test/Analysis/CostModel/AArch64/splice.ll @@ -3,36 +3,38 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -define void @vector_splice() #0 { +define void @vector_splice(i32 %offset) #0 { ; CHECK-LABEL: 'vector_splice' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v16i8 = call <16 x i8> @llvm.vector.splice.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v32i8 = call <32 x i8> @llvm.vector.splice.v32i8(<32 x i8> zeroinitializer, <32 x i8> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2i16 = call <2 x i16> @llvm.vector.splice.v2i16(<2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4i16 = call <4 x i16> @llvm.vector.splice.v4i16(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8i16 = call <8 x i16> @llvm.vector.splice.v8i16(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v16i16 = call <16 x i16> @llvm.vector.splice.v16i16(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4i32 = call <4 x i32> @llvm.vector.splice.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v8i32 = call <8 x i32> @llvm.vector.splice.v8i32(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2i64 = call <2 x i64> @llvm.vector.splice.v2i64(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v4i64 = call <4 x i64> @llvm.vector.splice.v4i64(<4 x i64> zeroinitializer, <4 x i64> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2f16 = call <2 x half> @llvm.vector.splice.v2f16(<2 x half> zeroinitializer, <2 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4f16 = call <4 x half> @llvm.vector.splice.v4f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8f16 = call <8 x half> @llvm.vector.splice.v8f16(<8 x half> zeroinitializer, <8 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v16f16 = call <16 x half> @llvm.vector.splice.v16f16(<16 x half> zeroinitializer, <16 x half> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2f32 = call <2 x float> @llvm.vector.splice.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4f32 = call <4 x float> @llvm.vector.splice.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v8f32 = call <8 x float> @llvm.vector.splice.v8f32(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2f64 = call <2 x double> @llvm.vector.splice.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v4f64 = call <4 x double> @llvm.vector.splice.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2bf16 = call <2 x bfloat> @llvm.vector.splice.v2bf16(<2 x bfloat> zeroinitializer, <2 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4bf16 = call <4 x bfloat> @llvm.vector.splice.v4bf16(<4 x bfloat> zeroinitializer, <4 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8bf16 = call <8 x bfloat> @llvm.vector.splice.v8bf16(<8 x bfloat> zeroinitializer, <8 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.v16bf16 = call <16 x bfloat> @llvm.vector.splice.v16bf16(<16 x bfloat> zeroinitializer, <16 x bfloat> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v16i1 = call <16 x i1> @llvm.vector.splice.v16i1(<16 x i1> zeroinitializer, <16 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v8i1 = call <8 x i1> @llvm.vector.splice.v8i1(<8 x i1> zeroinitializer, <8 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v4i1 = call <4 x i1> @llvm.vector.splice.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splice.v2i1 = call <2 x i1> @llvm.vector.splice.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %splice.v2i128 = call <2 x i128> @llvm.vector.splice.v2i128(<2 x i128> zeroinitializer, <2 x i128> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <16 x i8> @llvm.vector.splice.left.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <32 x i8> @llvm.vector.splice.left.v32i8(<32 x i8> zeroinitializer, <32 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <2 x i16> @llvm.vector.splice.left.v2i16(<2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <4 x i16> @llvm.vector.splice.left.v4i16(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <8 x i16> @llvm.vector.splice.left.v8i16(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <16 x i16> @llvm.vector.splice.left.v16i16(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <4 x i32> @llvm.vector.splice.left.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <8 x i32> @llvm.vector.splice.left.v8i32(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i64> @llvm.vector.splice.left.v2i64(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <4 x i64> @llvm.vector.splice.left.v4i64(<4 x i64> zeroinitializer, <4 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x half> @llvm.vector.splice.left.v2f16(<2 x half> zeroinitializer, <2 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x half> @llvm.vector.splice.left.v4f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x half> @llvm.vector.splice.left.v8f16(<8 x half> zeroinitializer, <8 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <16 x half> @llvm.vector.splice.left.v16f16(<16 x half> zeroinitializer, <16 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <2 x float> @llvm.vector.splice.left.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <4 x float> @llvm.vector.splice.left.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <8 x float> @llvm.vector.splice.left.v8f32(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <2 x double> @llvm.vector.splice.left.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <4 x double> @llvm.vector.splice.left.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x bfloat> @llvm.vector.splice.left.v2bf16(<2 x bfloat> zeroinitializer, <2 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x bfloat> @llvm.vector.splice.left.v4bf16(<4 x bfloat> zeroinitializer, <4 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x bfloat> @llvm.vector.splice.left.v8bf16(<8 x bfloat> zeroinitializer, <8 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call <16 x bfloat> @llvm.vector.splice.left.v16bf16(<16 x bfloat> zeroinitializer, <16 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <16 x i1> @llvm.vector.splice.left.v16i1(<16 x i1> zeroinitializer, <16 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <8 x i1> @llvm.vector.splice.left.v8i1(<8 x i1> zeroinitializer, <8 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x i1> @llvm.vector.splice.left.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <2 x i1> @llvm.vector.splice.left.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %28 = call <2 x i128> @llvm.vector.splice.left.v2i128(<2 x i128> zeroinitializer, <2 x i128> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %left.variable = call <4 x i32> @llvm.vector.splice.left.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %right.variable = call <4 x i32> @llvm.vector.splice.right.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 %offset) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %splice.v16i8 = call <16 x i8> @llvm.vector.splice.v16i8(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer, i32 1) @@ -63,6 +65,9 @@ define void @vector_splice() #0 { %splice.v4i1 = call <4 x i1> @llvm.vector.splice.v4i1(<4 x i1> zeroinitializer, <4 x i1> zeroinitializer, i32 1) %splice.v2i1 = call <2 x i1> @llvm.vector.splice.v2i1(<2 x i1> zeroinitializer, <2 x i1> zeroinitializer, i32 1) %splice.v2i128 = call <2 x i128> @llvm.vector.splice.v2i128(<2 x i128> zeroinitializer, <2 x i128> zeroinitializer, i32 1) + + %left.variable = call <4 x i32> @llvm.vector.splice.left(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 %offset) + %right.variable = call <4 x i32> @llvm.vector.splice.right(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 %offset) ret void } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll index 1c40354..a735640 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll @@ -1,218 +1,364 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 -mattr=+sve | FileCheck %s +; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16,+sve | FileCheck %s --check-prefixes=CHECK,CHECK-BASE +; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16,+sve-b16b16,+sve | FileCheck %s --check-prefixes=CHECK,CHECK-BF16 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @fadd() { ; CHECK-LABEL: 'fadd' -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <vscale x 4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <vscale x 8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <vscale x 16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fadd <vscale x 1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fadd <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <vscale x 4 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <vscale x 8 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <vscale x 16 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fadd <vscale x 1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fadd <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fadd <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fadd <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fadd <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fadd <vscale x 4 x double> poison, poison ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V4F16 = fadd <vscale x 4 x half> undef, undef - %V8F16 = fadd <vscale x 8 x half> undef, undef - %V16F16 = fadd <vscale x 16 x half> undef, undef + %V4F16 = fadd <vscale x 4 x half> poison, poison + %V8F16 = fadd <vscale x 8 x half> poison, poison + %V16F16 = fadd <vscale x 16 x half> poison, poison - %V1F32 = fadd <vscale x 1 x float> undef, undef - %V2F32 = fadd <vscale x 2 x float> undef, undef - %V4F32 = fadd <vscale x 4 x float> undef, undef - %V8F32 = fadd <vscale x 8 x float> undef, undef + %V1F32 = fadd <vscale x 1 x float> poison, poison + %V2F32 = fadd <vscale x 2 x float> poison, poison + %V4F32 = fadd <vscale x 4 x float> poison, poison + %V8F32 = fadd <vscale x 8 x float> poison, poison - %V2F64 = fadd <vscale x 2 x double> undef, undef - %V4F64 = fadd <vscale x 4 x double> undef, undef + %V2F64 = fadd <vscale x 2 x double> poison, poison + %V4F64 = fadd <vscale x 4 x double> poison, poison ret void } +define void @fadd_bf16() { +; CHECK-BASE-LABEL: 'fadd_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fadd_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison + + ret void +} + + define void @fsub() { ; CHECK-LABEL: 'fsub' -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <vscale x 4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <vscale x 8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <vscale x 16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fsub <vscale x 1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <vscale x 4 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <vscale x 8 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <vscale x 16 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %V1F32 = fsub <vscale x 1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fsub <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fsub <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fsub <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fsub <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fsub <vscale x 4 x double> poison, poison ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V4F16 = fsub <vscale x 4 x half> undef, undef - %V8F16 = fsub <vscale x 8 x half> undef, undef - %V16F16 = fsub <vscale x 16 x half> undef, undef + %V4F16 = fsub <vscale x 4 x half> poison, poison + %V8F16 = fsub <vscale x 8 x half> poison, poison + %V16F16 = fsub <vscale x 16 x half> poison, poison - %V1F32 = fsub <vscale x 1 x float> undef, undef - %V2F32 = fsub <vscale x 2 x float> undef, undef - %V4F32 = fsub <vscale x 4 x float> undef, undef - %V8F32 = fsub <vscale x 8 x float> undef, undef + %V1F32 = fsub <vscale x 1 x float> poison, poison + %V2F32 = fsub <vscale x 2 x float> poison, poison + %V4F32 = fsub <vscale x 4 x float> poison, poison + %V8F32 = fsub <vscale x 8 x float> poison, poison - %V2F64 = fsub <vscale x 2 x double> undef, undef - %V4F64 = fsub <vscale x 4 x double> undef, undef + %V2F64 = fsub <vscale x 2 x double> poison, poison + %V4F64 = fsub <vscale x 4 x double> poison, poison + + ret void +} + +define void @fsub_bf16() { +; CHECK-BASE-LABEL: 'fsub_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fsub_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison ret void } define void @fneg() { ; CHECK-LABEL: 'fneg' -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <vscale x 2 x half> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <vscale x 4 x half> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <vscale x 8 x half> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <vscale x 16 x half> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <vscale x 2 x float> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <vscale x 4 x float> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <vscale x 8 x float> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <vscale x 2 x double> undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <vscale x 4 x double> undef +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <vscale x 2 x half> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <vscale x 4 x half> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <vscale x 8 x half> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <vscale x 16 x half> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fneg <vscale x 2 x float> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fneg <vscale x 4 x float> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fneg <vscale x 8 x float> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fneg <vscale x 2 x double> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fneg <vscale x 4 x double> poison ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V2F16 = fneg <vscale x 2 x half> undef - %V4F16 = fneg <vscale x 4 x half> undef - %V8F16 = fneg <vscale x 8 x half> undef - %V16F16 = fneg <vscale x 16 x half> undef + %V2F16 = fneg <vscale x 2 x half> poison + %V4F16 = fneg <vscale x 4 x half> poison + %V8F16 = fneg <vscale x 8 x half> poison + %V16F16 = fneg <vscale x 16 x half> poison - %V2F32 = fneg <vscale x 2 x float> undef - %V4F32 = fneg <vscale x 4 x float> undef - %V8F32 = fneg <vscale x 8 x float> undef + %V2F32 = fneg <vscale x 2 x float> poison + %V4F32 = fneg <vscale x 4 x float> poison + %V8F32 = fneg <vscale x 8 x float> poison - %V2F64 = fneg <vscale x 2 x double> undef - %V4F64 = fneg <vscale x 4 x double> undef + %V2F64 = fneg <vscale x 2 x double> poison + %V4F64 = fneg <vscale x 4 x double> poison + + ret void +} + +define void @fneg_bf16() { +; CHECK-LABEL: 'fneg_bf16' +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV2BF16 = fneg <vscale x 2 x bfloat> poison + %NXV4BF16 = fneg <vscale x 4 x bfloat> poison + %NXV8BF16 = fneg <vscale x 8 x bfloat> poison + %NXV16BF16 = fneg <vscale x 16 x bfloat> poison ret void } define void @fmul() { ; CHECK-LABEL: 'fmul' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <vscale x 4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <vscale x 8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <vscale x 16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fmul <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fmul <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fmul <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fmul <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fmul <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <vscale x 4 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <vscale x 8 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <vscale x 16 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = fmul <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = fmul <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = fmul <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = fmul <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = fmul <vscale x 4 x double> poison, poison ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V4F16 = fmul <vscale x 4 x half> undef, undef - %V8F16 = fmul <vscale x 8 x half> undef, undef - %V16F16 = fmul <vscale x 16 x half> undef, undef + %V4F16 = fmul <vscale x 4 x half> poison, poison + %V8F16 = fmul <vscale x 8 x half> poison, poison + %V16F16 = fmul <vscale x 16 x half> poison, poison - %V2F32 = fmul <vscale x 2 x float> undef, undef - %V4F32 = fmul <vscale x 4 x float> undef, undef - %V8F32 = fmul <vscale x 8 x float> undef, undef + %V2F32 = fmul <vscale x 2 x float> poison, poison + %V4F32 = fmul <vscale x 4 x float> poison, poison + %V8F32 = fmul <vscale x 8 x float> poison, poison - %V2F64 = fmul <vscale x 2 x double> undef, undef - %V4F64 = fmul <vscale x 4 x double> undef, undef + %V2F64 = fmul <vscale x 2 x double> poison, poison + %V4F64 = fmul <vscale x 4 x double> poison, poison + + ret void +} + +define void @fmul_bf16() { +; CHECK-BASE-LABEL: 'fmul_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fmul_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison ret void } define void @fdiv() { ; CHECK-LABEL: 'fdiv' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <vscale x 4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <vscale x 8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16F16 = fdiv <vscale x 16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = fdiv <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = fdiv <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8F32 = fdiv <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = fdiv <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4F64 = fdiv <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <vscale x 4 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <vscale x 8 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16F16 = fdiv <vscale x 16 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = fdiv <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = fdiv <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8F32 = fdiv <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = fdiv <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4F64 = fdiv <vscale x 4 x double> poison, poison ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V4F16 = fdiv <vscale x 4 x half> undef, undef - %V8F16 = fdiv <vscale x 8 x half> undef, undef - %V16F16 = fdiv <vscale x 16 x half> undef, undef + %V4F16 = fdiv <vscale x 4 x half> poison, poison + %V8F16 = fdiv <vscale x 8 x half> poison, poison + %V16F16 = fdiv <vscale x 16 x half> poison, poison - %V2F32 = fdiv <vscale x 2 x float> undef, undef - %V4F32 = fdiv <vscale x 4 x float> undef, undef - %V8F32 = fdiv <vscale x 8 x float> undef, undef + %V2F32 = fdiv <vscale x 2 x float> poison, poison + %V4F32 = fdiv <vscale x 4 x float> poison, poison + %V8F32 = fdiv <vscale x 8 x float> poison, poison - %V2F64 = fdiv <vscale x 2 x double> undef, undef - %V4F64 = fdiv <vscale x 4 x double> undef, undef + %V2F64 = fdiv <vscale x 2 x double> poison, poison + %V4F64 = fdiv <vscale x 4 x double> poison, poison + + ret void +} + +define void @fdiv_bf16() { +; CHECK-LABEL: 'fdiv_bf16' +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:29 CodeSize:4 Lat:4 SizeLat:4 for: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:58 CodeSize:4 Lat:4 SizeLat:4 for: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison ret void } define void @frem() { ; CHECK-LABEL: 'frem' -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <vscale x 4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <vscale x 8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <vscale x 16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = frem <vscale x 2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <vscale x 4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <vscale x 8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <vscale x 2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <vscale x 4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <vscale x 4 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <vscale x 8 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <vscale x 16 x half> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F32 = frem <vscale x 2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <vscale x 4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <vscale x 8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <vscale x 2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <vscale x 4 x double> poison, poison ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V4F16 = frem <vscale x 4 x half> undef, undef - %V8F16 = frem <vscale x 8 x half> undef, undef - %V16F16 = frem <vscale x 16 x half> undef, undef + %V4F16 = frem <vscale x 4 x half> poison, poison + %V8F16 = frem <vscale x 8 x half> poison, poison + %V16F16 = frem <vscale x 16 x half> poison, poison - %V2F32 = frem <vscale x 2 x float> undef, undef - %V4F32 = frem <vscale x 4 x float> undef, undef - %V8F32 = frem <vscale x 8 x float> undef, undef + %V2F32 = frem <vscale x 2 x float> poison, poison + %V4F32 = frem <vscale x 4 x float> poison, poison + %V8F32 = frem <vscale x 8 x float> poison, poison - %V2F64 = frem <vscale x 2 x double> undef, undef - %V4F64 = frem <vscale x 4 x double> undef, undef + %V2F64 = frem <vscale x 2 x double> poison, poison + %V4F64 = frem <vscale x 4 x double> poison, poison + + ret void +} + +define void @frem_bf16() { +; CHECK-LABEL: 'frem_bf16' +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison + %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison + %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison ret void } define void @fma() { ; CHECK-LABEL: 'fma' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V4F16 = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) - %V8F16 = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) - %V16F16 = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) + %V4F16 = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) + %V8F16 = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) + %V16F16 = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) - %V2F32 = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) - %V4F32 = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) - %V8F32 = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) + %V2F32 = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) + %V4F32 = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) + %V8F32 = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) - %V2F64 = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) - %V4F64 = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) + %V2F64 = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) + %V4F64 = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) + + ret void +} + +define void @fma_bf16() { +; CHECK-BASE-LABEL: 'fma_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fma_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; CHECK-BF16-NEXT: Cost Model: Found costs of 4 for: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) + %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) + %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) ret void } define void @fmuladd() { ; CHECK-LABEL: 'fmuladd' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef) - %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.v8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef) - %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.v16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef) + %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.v4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison) + %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.v8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison) + %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.v16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison) - %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.v2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef) - %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.v4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef) - %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.v8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef) + %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.v2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison) + %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.v4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison) + %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.v8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison) - %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.v2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef) - %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.v4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef) + %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.v2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison) + %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.v4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison) + + ret void +} + +define void @fmuladd_bf16() { +; CHECK-BASE-LABEL: 'fmuladd_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fmuladd_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; CHECK-BF16-NEXT: Cost Model: Found costs of 4 for: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) + %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) + %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) ret void } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll index ecb4e14..7b482f3 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll @@ -44,38 +44,38 @@ define void @ext() { ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 15 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 15 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 12 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 12 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SVE128-NO-NEON-LABEL: 'ext' @@ -115,38 +115,38 @@ define void @ext() { ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; FIXED-MIN-256-LABEL: 'ext' @@ -198,26 +198,26 @@ define void @ext() { ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; FIXED-MIN-2048-LABEL: 'ext' @@ -386,22 +386,22 @@ define void @trunc() { ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> poison to <2 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> poison to <4 x i8> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> poison to <4 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> poison to <4 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s4i32i64 = trunc <4 x i64> poison to <4 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> poison to <8 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s8i16i32 = trunc <8 x i32> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %s16i8i16 = trunc <16 x i16> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SVE128-NO-NEON-LABEL: 'trunc' @@ -600,14 +600,14 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r82 = fptrunc <4 x double> poison to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r83 = fptrunc <8 x double> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r84 = fptrunc <16 x double> poison to <16 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r85 = fpext float poison to double ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r87 = fpext <4 x float> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r88 = fpext <8 x float> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r89 = fpext <16 x float> poison to <16 x double> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8> @@ -616,158 +616,158 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r98 = fptoui <2 x float> poison to <2 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r99 = fptosi <2 x float> poison to <2 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r100 = fptoui <2 x double> poison to <2 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r101 = fptosi <2 x double> poison to <2 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r102 = fptoui <2 x double> poison to <2 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r103 = fptosi <2 x double> poison to <2 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r104 = fptoui <2 x double> poison to <2 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r105 = fptosi <2 x double> poison to <2 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r106 = fptoui <2 x double> poison to <2 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r107 = fptosi <2 x double> poison to <2 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r110 = fptoui <4 x float> poison to <4 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r111 = fptosi <4 x float> poison to <4 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r112 = fptoui <4 x float> poison to <4 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r113 = fptosi <4 x float> poison to <4 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r114 = fptoui <4 x float> poison to <4 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r115 = fptosi <4 x float> poison to <4 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> poison to <4 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> poison to <4 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> poison to <4 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> poison to <4 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> poison to <4 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> poison to <4 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> poison to <4 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> poison to <4 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> poison to <8 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> poison to <8 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> poison to <8 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> poison to <8 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r118 = fptoui <4 x float> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r119 = fptosi <4 x float> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r120 = fptoui <4 x double> poison to <4 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r121 = fptosi <4 x double> poison to <4 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r122 = fptoui <4 x double> poison to <4 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r123 = fptosi <4 x double> poison to <4 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r124 = fptoui <4 x double> poison to <4 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r125 = fptosi <4 x double> poison to <4 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r126 = fptoui <4 x double> poison to <4 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r127 = fptosi <4 x double> poison to <4 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r128 = fptoui <4 x double> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r129 = fptosi <4 x double> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:41 CodeSize:25 Lat:41 SizeLat:41 for: %r130 = fptoui <8 x float> poison to <8 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:41 CodeSize:25 Lat:41 SizeLat:41 for: %r131 = fptosi <8 x float> poison to <8 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r132 = fptoui <8 x float> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r133 = fptosi <8 x float> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r134 = fptoui <8 x float> poison to <8 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %r135 = fptosi <8 x float> poison to <8 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r136 = fptoui <8 x float> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r137 = fptosi <8 x float> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r138 = fptoui <8 x float> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r139 = fptosi <8 x float> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r140 = fptoui <8 x double> poison to <8 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r141 = fptosi <8 x double> poison to <8 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 11 for: %r142 = fptoui <8 x double> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 11 for: %r143 = fptosi <8 x double> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 11 for: %r144 = fptoui <8 x double> poison to <8 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 11 for: %r145 = fptosi <8 x double> poison to <8 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r146 = fptoui <8 x double> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r147 = fptosi <8 x double> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r148 = fptoui <8 x double> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r149 = fptosi <8 x double> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:83 CodeSize:51 Lat:83 SizeLat:83 for: %r150 = fptoui <16 x float> poison to <16 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:83 CodeSize:51 Lat:83 SizeLat:83 for: %r151 = fptosi <16 x float> poison to <16 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 11 for: %r152 = fptoui <16 x float> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 11 for: %r153 = fptosi <16 x float> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r154 = fptoui <16 x float> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r155 = fptosi <16 x float> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r156 = fptoui <16 x float> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r157 = fptosi <16 x float> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 20 for: %r158 = fptoui <16 x float> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 20 for: %r159 = fptosi <16 x float> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:87 CodeSize:55 Lat:87 SizeLat:87 for: %r160 = fptoui <16 x double> poison to <16 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:87 CodeSize:55 Lat:87 SizeLat:87 for: %r161 = fptosi <16 x double> poison to <16 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 23 for: %r162 = fptoui <16 x double> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 23 for: %r163 = fptosi <16 x double> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 22 for: %r164 = fptoui <16 x double> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 22 for: %r165 = fptosi <16 x double> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 20 for: %r166 = fptoui <16 x double> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 20 for: %r167 = fptosi <16 x double> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r168 = fptoui <16 x double> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r169 = fptosi <16 x double> poison to <16 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r172 = uitofp <2 x i8> poison to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r173 = sitofp <2 x i8> poison to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r174 = uitofp <2 x i16> poison to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r175 = sitofp <2 x i16> poison to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r178 = uitofp <2 x i64> poison to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r179 = sitofp <2 x i64> poison to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r182 = uitofp <2 x i8> poison to <2 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r183 = sitofp <2 x i8> poison to <2 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r184 = uitofp <2 x i16> poison to <2 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r185 = sitofp <2 x i16> poison to <2 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r186 = uitofp <2 x i32> poison to <2 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r187 = sitofp <2 x i32> poison to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r192 = uitofp <4 x i8> poison to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r193 = sitofp <4 x i8> poison to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r194 = uitofp <4 x i16> poison to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r195 = sitofp <4 x i16> poison to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> poison to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> poison to <4 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:18 SizeLat:18 for: %r198 = uitofp <4 x i64> poison to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:11 Lat:18 SizeLat:18 for: %r199 = sitofp <4 x i64> poison to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r200 = uitofp <4 x i1> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r201 = sitofp <4 x i1> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 9 for: %r202 = uitofp <4 x i8> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 9 for: %r203 = sitofp <4 x i8> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 9 for: %r204 = uitofp <4 x i16> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 9 for: %r205 = sitofp <4 x i16> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r206 = uitofp <4 x i32> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r207 = sitofp <4 x i32> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r208 = uitofp <4 x i64> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r209 = sitofp <4 x i64> poison to <4 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r210 = uitofp <8 x i1> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r211 = sitofp <8 x i1> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r212 = uitofp <8 x i8> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 10 for: %r213 = sitofp <8 x i8> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r214 = uitofp <8 x i16> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r215 = sitofp <8 x i16> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r216 = uitofp <8 x i32> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r217 = sitofp <8 x i32> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:36 CodeSize:22 Lat:36 SizeLat:36 for: %r218 = uitofp <8 x i64> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:36 CodeSize:22 Lat:36 SizeLat:36 for: %r219 = sitofp <8 x i64> poison to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %r220 = uitofp <8 x i1> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %r221 = sitofp <8 x i1> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 19 for: %r222 = uitofp <8 x i8> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 19 for: %r223 = sitofp <8 x i8> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 19 for: %r224 = uitofp <8 x i16> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 19 for: %r225 = sitofp <8 x i16> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r226 = uitofp <8 x i32> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r227 = sitofp <8 x i32> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r228 = uitofp <8 x i64> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r229 = sitofp <8 x i64> poison to <8 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %r230 = uitofp <16 x i1> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %r231 = sitofp <16 x i1> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 21 for: %r232 = uitofp <16 x i8> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 21 for: %r233 = sitofp <16 x i8> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r234 = uitofp <16 x i16> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r235 = sitofp <16 x i16> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r236 = uitofp <16 x i32> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r237 = sitofp <16 x i32> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:72 SizeLat:72 for: %r238 = uitofp <16 x i64> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:72 CodeSize:44 Lat:72 SizeLat:72 for: %r239 = sitofp <16 x i64> poison to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found costs of 15 for: %r240 = uitofp <16 x i1> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 15 for: %r241 = sitofp <16 x i1> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 39 for: %r242 = uitofp <16 x i8> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 39 for: %r243 = sitofp <16 x i8> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 38 for: %r244 = uitofp <16 x i16> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 38 for: %r245 = sitofp <16 x i16> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %r246 = uitofp <16 x i32> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %r247 = sitofp <16 x i32> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r248 = uitofp <16 x i64> poison to <16 x double> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r249 = sitofp <16 x i64> poison to <16 x double> ; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SVE128-NO-NEON-LABEL: 'casts_no_users' @@ -813,14 +813,14 @@ define i32 @casts_no_users() { ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> poison to <4 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r82 = fptrunc <4 x double> poison to <4 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r83 = fptrunc <8 x double> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r84 = fptrunc <16 x double> poison to <16 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r85 = fpext float poison to double ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r87 = fpext <4 x float> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r88 = fpext <8 x float> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r89 = fpext <16 x float> poison to <16 x double> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8> @@ -849,62 +849,62 @@ define i32 @casts_no_users() { ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r115 = fptosi <4 x float> poison to <4 x i16> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> poison to <4 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> poison to <4 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> poison to <4 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> poison to <4 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> poison to <4 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> poison to <4 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> poison to <4 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> poison to <4 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> poison to <8 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> poison to <8 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> poison to <8 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> poison to <8 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> poison to <8 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> poison to <8 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r118 = fptoui <4 x float> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r119 = fptosi <4 x float> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r120 = fptoui <4 x double> poison to <4 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r121 = fptosi <4 x double> poison to <4 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r122 = fptoui <4 x double> poison to <4 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r123 = fptosi <4 x double> poison to <4 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r124 = fptoui <4 x double> poison to <4 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r125 = fptosi <4 x double> poison to <4 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r126 = fptoui <4 x double> poison to <4 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r127 = fptosi <4 x double> poison to <4 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r128 = fptoui <4 x double> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r129 = fptosi <4 x double> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r130 = fptoui <8 x float> poison to <8 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r131 = fptosi <8 x float> poison to <8 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r132 = fptoui <8 x float> poison to <8 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r133 = fptosi <8 x float> poison to <8 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r134 = fptoui <8 x float> poison to <8 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r135 = fptosi <8 x float> poison to <8 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r136 = fptoui <8 x float> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r137 = fptosi <8 x float> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r138 = fptoui <8 x float> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r139 = fptosi <8 x float> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r140 = fptoui <8 x double> poison to <8 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r141 = fptosi <8 x double> poison to <8 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r142 = fptoui <8 x double> poison to <8 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r143 = fptosi <8 x double> poison to <8 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r144 = fptoui <8 x double> poison to <8 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r145 = fptosi <8 x double> poison to <8 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r146 = fptoui <8 x double> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r147 = fptosi <8 x double> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r148 = fptoui <8 x double> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r149 = fptosi <8 x double> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r150 = fptoui <16 x float> poison to <16 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r151 = fptosi <16 x float> poison to <16 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r152 = fptoui <16 x float> poison to <16 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r153 = fptosi <16 x float> poison to <16 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r154 = fptoui <16 x float> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r155 = fptosi <16 x float> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r156 = fptoui <16 x float> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r157 = fptosi <16 x float> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r158 = fptoui <16 x float> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r159 = fptosi <16 x float> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r160 = fptoui <16 x double> poison to <16 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r161 = fptosi <16 x double> poison to <16 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r162 = fptoui <16 x double> poison to <16 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r163 = fptosi <16 x double> poison to <16 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r164 = fptoui <16 x double> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r165 = fptosi <16 x double> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r166 = fptoui <16 x double> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r167 = fptosi <16 x double> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r168 = fptoui <16 x double> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r169 = fptosi <16 x double> poison to <16 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r172 = uitofp <2 x i8> poison to <2 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r173 = sitofp <2 x i8> poison to <2 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r174 = uitofp <2 x i16> poison to <2 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r175 = sitofp <2 x i16> poison to <2 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float> @@ -913,8 +913,8 @@ define i32 @casts_no_users() { ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r179 = sitofp <2 x i64> poison to <2 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r182 = uitofp <2 x i8> poison to <2 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r183 = sitofp <2 x i8> poison to <2 x double> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r184 = uitofp <2 x i16> poison to <2 x double> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r185 = sitofp <2 x i16> poison to <2 x double> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r186 = uitofp <2 x i32> poison to <2 x double> @@ -923,64 +923,64 @@ define i32 @casts_no_users() { ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r192 = uitofp <4 x i8> poison to <4 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r193 = sitofp <4 x i8> poison to <4 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r194 = uitofp <4 x i16> poison to <4 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r195 = sitofp <4 x i16> poison to <4 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> poison to <4 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> poison to <4 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> poison to <4 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r198 = uitofp <4 x i64> poison to <4 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r199 = sitofp <4 x i64> poison to <4 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r200 = uitofp <4 x i1> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r201 = sitofp <4 x i1> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r202 = uitofp <4 x i8> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r203 = sitofp <4 x i8> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r204 = uitofp <4 x i16> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r205 = sitofp <4 x i16> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r206 = uitofp <4 x i32> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r207 = sitofp <4 x i32> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r208 = uitofp <4 x i64> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r209 = sitofp <4 x i64> poison to <4 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r210 = uitofp <8 x i1> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r211 = sitofp <8 x i1> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r212 = uitofp <8 x i8> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r213 = sitofp <8 x i8> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r214 = uitofp <8 x i16> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r215 = sitofp <8 x i16> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r216 = uitofp <8 x i32> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r217 = sitofp <8 x i32> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r218 = uitofp <8 x i64> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r219 = sitofp <8 x i64> poison to <8 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r220 = uitofp <8 x i1> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r221 = sitofp <8 x i1> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r222 = uitofp <8 x i8> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r223 = sitofp <8 x i8> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r224 = uitofp <8 x i16> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r225 = sitofp <8 x i16> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r226 = uitofp <8 x i32> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r227 = sitofp <8 x i32> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r228 = uitofp <8 x i64> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r229 = sitofp <8 x i64> poison to <8 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r230 = uitofp <16 x i1> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r231 = sitofp <16 x i1> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r232 = uitofp <16 x i8> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r233 = sitofp <16 x i8> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r234 = uitofp <16 x i16> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r235 = sitofp <16 x i16> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r236 = uitofp <16 x i32> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r237 = sitofp <16 x i32> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r238 = uitofp <16 x i64> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r239 = sitofp <16 x i64> poison to <16 x float> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r240 = uitofp <16 x i1> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r241 = sitofp <16 x i1> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 16 for: %r242 = uitofp <16 x i8> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 16 for: %r243 = sitofp <16 x i8> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r244 = uitofp <16 x i16> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r245 = sitofp <16 x i16> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r246 = uitofp <16 x i32> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r247 = sitofp <16 x i32> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r248 = uitofp <16 x i64> poison to <16 x double> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r249 = sitofp <16 x i64> poison to <16 x double> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; FIXED-MIN-256-LABEL: 'casts_no_users' @@ -1027,13 +1027,13 @@ define i32 @casts_no_users() { ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r82 = fptrunc <4 x double> poison to <4 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r83 = fptrunc <8 x double> poison to <8 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r84 = fptrunc <16 x double> poison to <16 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r85 = fpext float poison to double ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r87 = fpext <4 x float> poison to <4 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r88 = fpext <8 x float> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r89 = fpext <16 x float> poison to <16 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8> @@ -1042,24 +1042,24 @@ define i32 @casts_no_users() { ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r98 = fptoui <2 x float> poison to <2 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r99 = fptosi <2 x float> poison to <2 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r100 = fptoui <2 x double> poison to <2 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r101 = fptosi <2 x double> poison to <2 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r102 = fptoui <2 x double> poison to <2 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r103 = fptosi <2 x double> poison to <2 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r104 = fptoui <2 x double> poison to <2 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r105 = fptosi <2 x double> poison to <2 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r106 = fptoui <2 x double> poison to <2 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r107 = fptosi <2 x double> poison to <2 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r110 = fptoui <4 x float> poison to <4 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r111 = fptosi <4 x float> poison to <4 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r112 = fptoui <4 x float> poison to <4 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r113 = fptosi <4 x float> poison to <4 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r114 = fptoui <4 x float> poison to <4 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r115 = fptosi <4 x float> poison to <4 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r118 = fptoui <4 x float> poison to <4 x i64> @@ -1082,72 +1082,72 @@ define i32 @casts_no_users() { ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r135 = fptosi <8 x float> poison to <8 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r136 = fptoui <8 x float> poison to <8 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r137 = fptosi <8 x float> poison to <8 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r138 = fptoui <8 x float> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r139 = fptosi <8 x float> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r140 = fptoui <8 x double> poison to <8 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r141 = fptosi <8 x double> poison to <8 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r142 = fptoui <8 x double> poison to <8 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r143 = fptosi <8 x double> poison to <8 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r144 = fptoui <8 x double> poison to <8 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r145 = fptosi <8 x double> poison to <8 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r146 = fptoui <8 x double> poison to <8 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r147 = fptosi <8 x double> poison to <8 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r148 = fptoui <8 x double> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r149 = fptosi <8 x double> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r150 = fptoui <16 x float> poison to <16 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r151 = fptosi <16 x float> poison to <16 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r152 = fptoui <16 x float> poison to <16 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r153 = fptosi <16 x float> poison to <16 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r154 = fptoui <16 x float> poison to <16 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r155 = fptosi <16 x float> poison to <16 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r156 = fptoui <16 x float> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r157 = fptosi <16 x float> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r158 = fptoui <16 x float> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r159 = fptosi <16 x float> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r160 = fptoui <16 x double> poison to <16 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r161 = fptosi <16 x double> poison to <16 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r162 = fptoui <16 x double> poison to <16 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r163 = fptosi <16 x double> poison to <16 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r164 = fptoui <16 x double> poison to <16 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r165 = fptosi <16 x double> poison to <16 x i16> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r166 = fptoui <16 x double> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r167 = fptosi <16 x double> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r168 = fptoui <16 x double> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r169 = fptosi <16 x double> poison to <16 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 3 for: %r172 = uitofp <2 x i8> poison to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 3 for: %r173 = sitofp <2 x i8> poison to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 3 for: %r174 = uitofp <2 x i16> poison to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 3 for: %r175 = sitofp <2 x i16> poison to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r178 = uitofp <2 x i64> poison to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r179 = sitofp <2 x i64> poison to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r182 = uitofp <2 x i8> poison to <2 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r183 = sitofp <2 x i8> poison to <2 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r184 = uitofp <2 x i16> poison to <2 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r185 = sitofp <2 x i16> poison to <2 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r186 = uitofp <2 x i32> poison to <2 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r187 = sitofp <2 x i32> poison to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 3 for: %r192 = uitofp <4 x i8> poison to <4 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r193 = sitofp <4 x i8> poison to <4 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r194 = uitofp <4 x i16> poison to <4 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r195 = sitofp <4 x i16> poison to <4 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r198 = uitofp <4 x i64> poison to <4 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r199 = sitofp <4 x i64> poison to <4 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r200 = uitofp <4 x i1> poison to <4 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r201 = sitofp <4 x i1> poison to <4 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r202 = uitofp <4 x i8> poison to <4 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r203 = sitofp <4 x i8> poison to <4 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r204 = uitofp <4 x i16> poison to <4 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r205 = sitofp <4 x i16> poison to <4 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r206 = uitofp <4 x i32> poison to <4 x double> @@ -1156,44 +1156,44 @@ define i32 @casts_no_users() { ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r209 = sitofp <4 x i64> poison to <4 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r210 = uitofp <8 x i1> poison to <8 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r211 = sitofp <8 x i1> poison to <8 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r212 = uitofp <8 x i8> poison to <8 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r213 = sitofp <8 x i8> poison to <8 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r214 = uitofp <8 x i16> poison to <8 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r215 = sitofp <8 x i16> poison to <8 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r216 = uitofp <8 x i32> poison to <8 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r217 = sitofp <8 x i32> poison to <8 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r218 = uitofp <8 x i64> poison to <8 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r219 = sitofp <8 x i64> poison to <8 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r220 = uitofp <8 x i1> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r221 = sitofp <8 x i1> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r222 = uitofp <8 x i8> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r223 = sitofp <8 x i8> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r224 = uitofp <8 x i16> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r225 = sitofp <8 x i16> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r226 = uitofp <8 x i32> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r227 = sitofp <8 x i32> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r228 = uitofp <8 x i64> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r229 = sitofp <8 x i64> poison to <8 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r230 = uitofp <16 x i1> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r231 = sitofp <16 x i1> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r232 = uitofp <16 x i8> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r233 = sitofp <16 x i8> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r234 = uitofp <16 x i16> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r235 = sitofp <16 x i16> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r236 = uitofp <16 x i32> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r237 = sitofp <16 x i32> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r238 = uitofp <16 x i64> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r239 = sitofp <16 x i64> poison to <16 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r240 = uitofp <16 x i1> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r241 = sitofp <16 x i1> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 8 for: %r242 = uitofp <16 x i8> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 8 for: %r243 = sitofp <16 x i8> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r244 = uitofp <16 x i16> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r245 = sitofp <16 x i16> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r246 = uitofp <16 x i32> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r247 = sitofp <16 x i32> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r248 = uitofp <16 x i64> poison to <16 x double> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r249 = sitofp <16 x i64> poison to <16 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; FIXED-MIN-2048-LABEL: 'casts_no_users' @@ -1255,24 +1255,24 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r98 = fptoui <2 x float> poison to <2 x i64> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r99 = fptosi <2 x float> poison to <2 x i64> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r100 = fptoui <2 x double> poison to <2 x i1> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r101 = fptosi <2 x double> poison to <2 x i1> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r102 = fptoui <2 x double> poison to <2 x i8> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r103 = fptosi <2 x double> poison to <2 x i8> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r104 = fptoui <2 x double> poison to <2 x i16> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r105 = fptosi <2 x double> poison to <2 x i16> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r106 = fptoui <2 x double> poison to <2 x i32> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r107 = fptosi <2 x double> poison to <2 x i32> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r110 = fptoui <4 x float> poison to <4 x i1> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %r111 = fptosi <4 x float> poison to <4 x i1> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r112 = fptoui <4 x float> poison to <4 x i8> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r113 = fptosi <4 x float> poison to <4 x i8> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r114 = fptoui <4 x float> poison to <4 x i16> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r115 = fptosi <4 x float> poison to <4 x i16> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r118 = fptoui <4 x float> poison to <4 x i64> @@ -1329,38 +1329,38 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r169 = fptosi <16 x double> poison to <16 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 3 for: %r172 = uitofp <2 x i8> poison to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 3 for: %r173 = sitofp <2 x i8> poison to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 3 for: %r174 = uitofp <2 x i16> poison to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 3 for: %r175 = sitofp <2 x i16> poison to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r178 = uitofp <2 x i64> poison to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:8 for: %r179 = sitofp <2 x i64> poison to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 4 for: %r182 = uitofp <2 x i8> poison to <2 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 4 for: %r183 = sitofp <2 x i8> poison to <2 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 4 for: %r184 = uitofp <2 x i16> poison to <2 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 4 for: %r185 = sitofp <2 x i16> poison to <2 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r186 = uitofp <2 x i32> poison to <2 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r187 = sitofp <2 x i32> poison to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 3 for: %r192 = uitofp <4 x i8> poison to <4 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 4 for: %r193 = sitofp <4 x i8> poison to <4 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r194 = uitofp <4 x i16> poison to <4 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r195 = sitofp <4 x i16> poison to <4 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r198 = uitofp <4 x i64> poison to <4 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r199 = sitofp <4 x i64> poison to <4 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r200 = uitofp <4 x i1> poison to <4 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r201 = sitofp <4 x i1> poison to <4 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r202 = uitofp <4 x i8> poison to <4 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r203 = sitofp <4 x i8> poison to <4 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r204 = uitofp <4 x i16> poison to <4 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r205 = sitofp <4 x i16> poison to <4 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r206 = uitofp <4 x i32> poison to <4 x double> @@ -1369,8 +1369,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r209 = sitofp <4 x i64> poison to <4 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r210 = uitofp <8 x i1> poison to <8 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r211 = sitofp <8 x i1> poison to <8 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r212 = uitofp <8 x i8> poison to <8 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r213 = sitofp <8 x i8> poison to <8 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r214 = uitofp <8 x i16> poison to <8 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r215 = sitofp <8 x i16> poison to <8 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r216 = uitofp <8 x i32> poison to <8 x float> @@ -1379,8 +1379,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r219 = sitofp <8 x i64> poison to <8 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r220 = uitofp <8 x i1> poison to <8 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r221 = sitofp <8 x i1> poison to <8 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r222 = uitofp <8 x i8> poison to <8 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r223 = sitofp <8 x i8> poison to <8 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r224 = uitofp <8 x i16> poison to <8 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r225 = sitofp <8 x i16> poison to <8 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r226 = uitofp <8 x i32> poison to <8 x double> @@ -1389,8 +1389,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r229 = sitofp <8 x i64> poison to <8 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r230 = uitofp <16 x i1> poison to <16 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r231 = sitofp <16 x i1> poison to <16 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r232 = uitofp <16 x i8> poison to <16 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r233 = sitofp <16 x i8> poison to <16 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r234 = uitofp <16 x i16> poison to <16 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r235 = sitofp <16 x i16> poison to <16 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r236 = uitofp <16 x i32> poison to <16 x float> @@ -1399,8 +1399,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r239 = sitofp <16 x i64> poison to <16 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r240 = uitofp <16 x i1> poison to <16 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r241 = sitofp <16 x i1> poison to <16 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r242 = uitofp <16 x i8> poison to <16 x double> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r243 = sitofp <16 x i8> poison to <16 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r244 = uitofp <16 x i16> poison to <16 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r245 = sitofp <16 x i16> poison to <16 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r246 = uitofp <16 x i32> poison to <16 x double> @@ -1819,12 +1819,12 @@ define i32 @load_extends() #0 { ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v12 = sext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v13 = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %v12 = sext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %v13 = zext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 0 for: %v14 = sext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 0 for: %v15 = zext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SVE128-NO-NEON-LABEL: 'load_extends' @@ -1868,8 +1868,8 @@ define i32 @load_extends() #0 { ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 0 for: %v13 = zext <4 x i32> %loadv4i32 to <4 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 0 for: %v14 = sext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 0 for: %v15 = zext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; FIXED-MIN-256-LABEL: 'load_extends' @@ -1913,8 +1913,8 @@ define i32 @load_extends() #0 { ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 0 for: %v13 = zext <4 x i32> %loadv4i32 to <4 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 0 for: %v14 = sext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 0 for: %v15 = zext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; FIXED-MIN-2048-LABEL: 'load_extends' @@ -1958,8 +1958,8 @@ define i32 @load_extends() #0 { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 0 for: %v13 = zext <4 x i32> %loadv4i32 to <4 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 0 for: %v14 = sext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 0 for: %v15 = zext <vscale x 2 x i32> %loadnxv2i32 to <vscale x 2 x i64> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %v16 = sext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %loadi8 = load i8, ptr undef @@ -2102,58 +2102,58 @@ define void @fp16cast() { ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r98 = fptoui <2 x half> poison to <2 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r99 = fptosi <2 x half> poison to <2 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r116 = fptoui <4 x half> poison to <4 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r117 = fptosi <4 x half> poison to <4 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r118 = fptoui <4 x half> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:13 Lat:21 SizeLat:21 for: %r119 = fptosi <4 x half> poison to <4 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r130 = fptoui <8 x half> poison to <8 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r131 = fptosi <8 x half> poison to <8 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r132 = fptoui <8 x half> poison to <8 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r133 = fptosi <8 x half> poison to <8 x i8> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r136 = fptoui <8 x half> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r137 = fptosi <8 x half> poison to <8 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r138 = fptoui <8 x half> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:43 CodeSize:27 Lat:43 SizeLat:43 for: %r139 = fptosi <8 x half> poison to <8 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r150 = fptoui <16 x half> poison to <16 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:81 CodeSize:49 Lat:81 SizeLat:81 for: %r151 = fptosi <16 x half> poison to <16 x i1> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r152 = fptoui <16 x half> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r153 = fptosi <16 x half> poison to <16 x i8> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r154 = fptoui <16 x half> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r155 = fptosi <16 x half> poison to <16 x i16> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r156 = fptoui <16 x half> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %r157 = fptosi <16 x half> poison to <16 x i32> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:86 CodeSize:54 Lat:86 SizeLat:86 for: %r158 = fptoui <16 x half> poison to <16 x i64> +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:86 CodeSize:54 Lat:86 SizeLat:86 for: %r159 = fptosi <16 x half> poison to <16 x i64> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r252 = uitofp <8 x i8> poison to <8 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r253 = sitofp <8 x i8> poison to <8 x half> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half> ; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r256 = uitofp <8 x i32> poison to <8 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r257 = sitofp <8 x i32> poison to <8 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %r258 = uitofp <8 x i64> poison to <8 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 7 for: %r259 = sitofp <8 x i64> poison to <8 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r260 = uitofp <16 x i1> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 3 for: %r261 = sitofp <16 x i1> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r262 = uitofp <16 x i8> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %r263 = sitofp <16 x i8> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r264 = uitofp <16 x i16> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %r265 = sitofp <16 x i16> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %r266 = uitofp <16 x i32> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %r267 = sitofp <16 x i32> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 14 for: %r268 = uitofp <16 x i64> poison to <16 x half> +; CHECK-SVE-NEXT: Cost Model: Found costs of 14 for: %r269 = sitofp <16 x i64> poison to <16 x half> ; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; SVE128-NO-NEON-LABEL: 'fp16cast' @@ -2185,48 +2185,48 @@ define void @fp16cast() { ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r116 = fptoui <4 x half> poison to <4 x i32> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r117 = fptosi <4 x half> poison to <4 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r118 = fptoui <4 x half> poison to <4 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r119 = fptosi <4 x half> poison to <4 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r130 = fptoui <8 x half> poison to <8 x i1> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r131 = fptosi <8 x half> poison to <8 x i1> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r132 = fptoui <8 x half> poison to <8 x i8> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r133 = fptosi <8 x half> poison to <8 x i8> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r136 = fptoui <8 x half> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r137 = fptosi <8 x half> poison to <8 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r138 = fptoui <8 x half> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r139 = fptosi <8 x half> poison to <8 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r150 = fptoui <16 x half> poison to <16 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r151 = fptosi <16 x half> poison to <16 x i1> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r152 = fptoui <16 x half> poison to <16 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r153 = fptosi <16 x half> poison to <16 x i8> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r154 = fptoui <16 x half> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r155 = fptosi <16 x half> poison to <16 x i16> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r156 = fptoui <16 x half> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r157 = fptosi <16 x half> poison to <16 x i32> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r158 = fptoui <16 x half> poison to <16 x i64> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r159 = fptosi <16 x half> poison to <16 x i64> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r252 = uitofp <8 x i8> poison to <8 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r253 = sitofp <8 x i8> poison to <8 x half> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half> -; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r256 = uitofp <8 x i32> poison to <8 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r257 = sitofp <8 x i32> poison to <8 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r258 = uitofp <8 x i64> poison to <8 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r259 = sitofp <8 x i64> poison to <8 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r260 = uitofp <16 x i1> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r261 = sitofp <16 x i1> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r262 = uitofp <16 x i8> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r263 = sitofp <16 x i8> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r264 = uitofp <16 x i16> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 2 for: %r265 = sitofp <16 x i16> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r266 = uitofp <16 x i32> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 4 for: %r267 = sitofp <16 x i32> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r268 = uitofp <16 x i64> poison to <16 x half> +; SVE128-NO-NEON-NEXT: Cost Model: Found costs of 8 for: %r269 = sitofp <16 x i64> poison to <16 x half> ; SVE128-NO-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; FIXED-MIN-256-LABEL: 'fp16cast' @@ -2248,58 +2248,58 @@ define void @fp16cast() { ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r98 = fptoui <2 x half> poison to <2 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r99 = fptosi <2 x half> poison to <2 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r116 = fptoui <4 x half> poison to <4 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r117 = fptosi <4 x half> poison to <4 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r118 = fptoui <4 x half> poison to <4 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r119 = fptosi <4 x half> poison to <4 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r130 = fptoui <8 x half> poison to <8 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r131 = fptosi <8 x half> poison to <8 x i1> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r132 = fptoui <8 x half> poison to <8 x i8> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r133 = fptosi <8 x half> poison to <8 x i8> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r136 = fptoui <8 x half> poison to <8 x i32> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r137 = fptosi <8 x half> poison to <8 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r138 = fptoui <8 x half> poison to <8 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r139 = fptosi <8 x half> poison to <8 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r150 = fptoui <16 x half> poison to <16 x i1> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r151 = fptosi <16 x half> poison to <16 x i1> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r152 = fptoui <16 x half> poison to <16 x i8> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r153 = fptosi <16 x half> poison to <16 x i8> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r154 = fptoui <16 x half> poison to <16 x i16> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r155 = fptosi <16 x half> poison to <16 x i16> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r156 = fptoui <16 x half> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r157 = fptosi <16 x half> poison to <16 x i32> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r158 = fptoui <16 x half> poison to <16 x i64> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r159 = fptosi <16 x half> poison to <16 x i64> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r252 = uitofp <8 x i8> poison to <8 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r253 = sitofp <8 x i8> poison to <8 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r256 = uitofp <8 x i32> poison to <8 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r257 = sitofp <8 x i32> poison to <8 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r258 = uitofp <8 x i64> poison to <8 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r259 = sitofp <8 x i64> poison to <8 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r260 = uitofp <16 x i1> poison to <16 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r261 = sitofp <16 x i1> poison to <16 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r262 = uitofp <16 x i8> poison to <16 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r263 = sitofp <16 x i8> poison to <16 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r264 = uitofp <16 x i16> poison to <16 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of 1 for: %r265 = sitofp <16 x i16> poison to <16 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half> -; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r266 = uitofp <16 x i32> poison to <16 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 2 for: %r267 = sitofp <16 x i32> poison to <16 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r268 = uitofp <16 x i64> poison to <16 x half> +; FIXED-MIN-256-NEXT: Cost Model: Found costs of 4 for: %r269 = sitofp <16 x i64> poison to <16 x half> ; FIXED-MIN-256-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; FIXED-MIN-2048-LABEL: 'fp16cast' @@ -2321,22 +2321,22 @@ define void @fp16cast() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r98 = fptoui <2 x half> poison to <2 x i64> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %r99 = fptosi <2 x half> poison to <2 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r116 = fptoui <4 x half> poison to <4 x i32> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r117 = fptosi <4 x half> poison to <4 x i32> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r118 = fptoui <4 x half> poison to <4 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r119 = fptosi <4 x half> poison to <4 x i64> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r130 = fptoui <8 x half> poison to <8 x i1> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %r131 = fptosi <8 x half> poison to <8 x i1> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r132 = fptoui <8 x half> poison to <8 x i8> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r133 = fptosi <8 x half> poison to <8 x i8> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r136 = fptoui <8 x half> poison to <8 x i32> @@ -2355,8 +2355,8 @@ define void @fp16cast() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r159 = fptosi <16 x half> poison to <16 x i64> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r252 = uitofp <8 x i8> poison to <8 x half> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r253 = sitofp <8 x i8> poison to <8 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r256 = uitofp <8 x i32> poison to <8 x half> @@ -2365,8 +2365,8 @@ define void @fp16cast() { ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r259 = sitofp <8 x i64> poison to <8 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r260 = uitofp <16 x i1> poison to <16 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r261 = sitofp <16 x i1> poison to <16 x half> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half> -; FIXED-MIN-2048-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r262 = uitofp <16 x i8> poison to <16 x half> +; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 2 for: %r263 = sitofp <16 x i8> poison to <16 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r264 = uitofp <16 x i16> poison to <16 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r265 = sitofp <16 x i16> poison to <16 x half> ; FIXED-MIN-2048-NEXT: Cost Model: Found costs of 1 for: %r266 = uitofp <16 x i32> poison to <16 x half> diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll index 5029ef9..6af9920 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll @@ -61,7 +61,7 @@ define void @cmp_legal_fp() #0 { ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %1 = fcmp oge <vscale x 2 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %2 = fcmp oge <vscale x 4 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:1 for: %3 = fcmp oge <vscale x 8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %4 = fcmp oge <vscale x 8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %4 = fcmp oge <vscale x 8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fcmp oge <vscale x 2 x double> undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll index 91aaea2..d1a4486 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll @@ -5,26 +5,26 @@ target triple = "aarch64-unknown-linux-gnu" define void @sve_ext() { ; CHECK-LABEL: 'sve_ext' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> poison to <vscale x 16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> poison to <vscale x 16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> poison to <vscale x 16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i16_to_i32 = zext <vscale x 8 x i16> poison to <vscale x 8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i16_to_i64 = zext <vscale x 8 x i16> poison to <vscale x 8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i32_to_i64 = zext <vscale x 4 x i32> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i8_to_i64 = zext <vscale x 4 x i8> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i8_to_i32 = zext <vscale x 8 x i8> poison to <vscale x 8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i16_to_i64 = zext <vscale x 4 x i16> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i8_to_i64 = zext <vscale x 8 x i8> poison to <vscale x 8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> poison to <vscale x 16 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> poison to <vscale x 16 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> poison to <vscale x 16 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i16_to_i32 = sext <vscale x 8 x i16> poison to <vscale x 8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i16_to_i64 = sext <vscale x 8 x i16> poison to <vscale x 8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i32_to_i64 = sext <vscale x 4 x i32> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i8_to_i64 = sext <vscale x 4 x i8> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i8_to_i32 = sext <vscale x 8 x i8> poison to <vscale x 8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i16_to_i64 = sext <vscale x 4 x i16> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i8_to_i64 = sext <vscale x 8 x i8> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> poison to <vscale x 16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> poison to <vscale x 16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 14 for: %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> poison to <vscale x 16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext_nxv8_i16_to_i32 = zext <vscale x 8 x i16> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %zext_nxv8_i16_to_i64 = zext <vscale x 8 x i16> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext_nxv4_i32_to_i64 = zext <vscale x 4 x i32> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zext_nxv4_i8_to_i64 = zext <vscale x 4 x i8> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zext_nxv8_i8_to_i32 = zext <vscale x 8 x i8> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zext_nxv4_i16_to_i64 = zext <vscale x 4 x i16> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %zext_nxv8_i8_to_i64 = zext <vscale x 8 x i8> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> poison to <vscale x 16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> poison to <vscale x 16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 14 for: %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> poison to <vscale x 16 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext_nxv8_i16_to_i32 = sext <vscale x 8 x i16> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %sext_nxv8_i16_to_i64 = sext <vscale x 8 x i16> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext_nxv4_i32_to_i64 = sext <vscale x 4 x i32> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sext_nxv4_i8_to_i64 = sext <vscale x 4 x i8> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sext_nxv8_i8_to_i32 = sext <vscale x 8 x i8> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sext_nxv4_i16_to_i64 = sext <vscale x 4 x i16> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %sext_nxv8_i8_to_i64 = sext <vscale x 8 x i8> poison to <vscale x 8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> poison to <vscale x 16 x i16> diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll index 140c53b..89d6a19 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll @@ -48,13 +48,13 @@ define void @fcmp_oeq_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_oeq_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oeq <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp oeq <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_oeq_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oeq <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp oeq <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp oeq <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp oeq <vscale x 2 x bfloat> undef, undef @@ -104,13 +104,13 @@ define void @fcmp_ogt_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_ogt_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ogt <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ogt <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_ogt_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ogt <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ogt <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ogt <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ogt <vscale x 2 x bfloat> undef, undef @@ -160,13 +160,13 @@ define void @fcmp_oge_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_oge_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oge <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp oge <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_oge_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp oge <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp oge <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp oge <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp oge <vscale x 2 x bfloat> undef, undef @@ -216,13 +216,13 @@ define void @fcmp_olt_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_olt_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp olt <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp olt <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_olt_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp olt <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp olt <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp olt <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp olt <vscale x 2 x bfloat> undef, undef @@ -272,13 +272,13 @@ define void @fcmp_ole_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_ole_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ole <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ole <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_ole_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ole <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ole <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ole <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ole <vscale x 2 x bfloat> undef, undef @@ -328,13 +328,13 @@ define void @fcmp_one_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_one_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp one <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:5 for: %v4bf16 = fcmp one <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:15 Lat:15 SizeLat:15 for: %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_one_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp one <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v4bf16 = fcmp one <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 15 for: %v8bf16 = fcmp one <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp one <vscale x 2 x bfloat> undef, undef @@ -384,13 +384,13 @@ define void @fcmp_ord_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_ord_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ord <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ord <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_ord_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ord <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ord <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ord <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ord <vscale x 2 x bfloat> undef, undef @@ -440,13 +440,13 @@ define void @fcmp_ueq_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_ueq_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp ueq <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:8 SizeLat:5 for: %v4bf16 = fcmp ueq <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:15 Lat:15 SizeLat:15 for: %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_ueq_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v2bf16 = fcmp ueq <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:8 SizeLat:5 for: %v4bf16 = fcmp ueq <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:9 Lat:9 SizeLat:9 for: %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 15 for: %v8bf16 = fcmp ueq <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ueq <vscale x 2 x bfloat> undef, undef @@ -496,13 +496,13 @@ define void @fcmp_ugt_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_ugt_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ugt <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ugt <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_ugt_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ugt <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ugt <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ugt <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ugt <vscale x 2 x bfloat> undef, undef @@ -552,13 +552,13 @@ define void @fcmp_uge_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_uge_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp uge <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp uge <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_uge_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp uge <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp uge <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp uge <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp uge <vscale x 2 x bfloat> undef, undef @@ -608,13 +608,13 @@ define void @fcmp_ult_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_ult_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ult <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ult <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_ult_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ult <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ult <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ult <vscale x 8 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ult <vscale x 2 x bfloat> undef, undef @@ -664,15 +664,15 @@ define void @fcmp_ule_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_ule_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ule <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ule <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_ule_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp ule <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp ule <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp ule <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp ule <vscale x 16 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ule <vscale x 2 x bfloat> undef, undef @@ -723,15 +723,15 @@ define void @fcmp_une_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_une_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp une <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp une <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_une_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp une <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp une <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp une <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp une <vscale x 16 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp une <vscale x 2 x bfloat> undef, undef @@ -782,15 +782,15 @@ define void @fcmp_uno_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_uno_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp uno <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp uno <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_uno_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp uno <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp uno <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp uno <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp uno <vscale x 16 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp uno <vscale x 2 x bfloat> undef, undef @@ -841,15 +841,15 @@ define void @fcmp_true_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_true_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp true <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp true <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_true_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp true <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp true <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp true <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp true <vscale x 16 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp true <vscale x 2 x bfloat> undef, undef @@ -900,15 +900,15 @@ define void @fcmp_false_bfloat(i32 %arg) { ; CHECK-COST2-LABEL: 'fcmp_false_bfloat' ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp false <vscale x 2 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp false <vscale x 4 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef -; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:11 Lat:11 SizeLat:11 for: %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef +; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef ; CHECK-COST2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-COST1-LABEL: 'fcmp_false_bfloat' ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v2bf16 = fcmp false <vscale x 2 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:4 SizeLat:3 for: %v4bf16 = fcmp false <vscale x 4 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef -; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:7 Lat:5 SizeLat:7 for: %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of 11 for: %v8bf16 = fcmp false <vscale x 8 x bfloat> undef, undef +; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:22 Lat:20 SizeLat:22 for: %v16bf16 = fcmp false <vscale x 16 x bfloat> undef, undef ; CHECK-COST1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp false <vscale x 2 x bfloat> undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll index df40a96..e128987 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll @@ -1,19 +1,8 @@ ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -D#VBITS=128 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=128 | FileCheck %s -D#VBITS=128 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=256 | FileCheck %s -D#VBITS=256 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=384 | FileCheck %s -D#VBITS=256 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=512 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=640 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=768 | FileCheck %s -D#VBITS=512 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=896 | FileCheck %s -D#VBITS=512 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1024 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1152 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1280 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1408 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1536 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1664 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1792 | FileCheck %s -D#VBITS=1024 -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1920 | FileCheck %s -D#VBITS=1024 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=2048 | FileCheck %s -D#VBITS=2048 ; VBITS represents the useful bit size of a vector register from the code diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll index 1e698b1..8a03837 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll @@ -8,13 +8,13 @@ define void @sve_fpext() { ; CHECK-LABEL: 'sve_fpext' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x half> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x half> poison to <vscale x 4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x half> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x half> poison to <vscale x 8 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x half> poison to <vscale x 2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x half> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x half> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x half> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x half> poison to <vscale x 8 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f32_to_f64 = fpext <vscale x 2 x float> poison to <vscale x 2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f32_to_f64 = fpext <vscale x 4 x float> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f32_to_f64 = fpext <vscale x 8 x float> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nxv4_f32_to_f64 = fpext <vscale x 4 x float> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nxv8_f32_to_f64 = fpext <vscale x 8 x float> poison to <vscale x 8 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %nxv2_f16_to_f32 = fpext <vscale x 2 x half> poison to <vscale x 2 x float> @@ -36,10 +36,10 @@ define void @sve_fpext_bf16() { ; CHECK-LABEL: 'sve_fpext_bf16' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> poison to <vscale x 4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 14 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x float> diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll index ce624a1..ef2ee6e 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll @@ -6,18 +6,18 @@ target triple = "aarch64-unknown-linux-gnu" define void @sve-fptoi() { ; CHECK-LABEL: 'sve-fptoi' -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si8 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui8 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si32 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui32 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si64 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui64 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si8 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui8 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si16 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui16 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si64 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui64 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i64> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f16_to_si8 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i8> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f16_to_ui8 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i8> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f16_to_si32 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i32> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f16_to_ui32 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i32> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f16_to_si64 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i64> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f16_to_ui64 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i64> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f32_to_si8 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i8> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f32_to_ui8 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i8> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f32_to_si16 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i16> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f32_to_ui16 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i16> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f32_to_si64 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i64> +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %nv1f32_to_ui64 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv1f64_to_si8 = fptosi <vscale x 1 x double> poison to <vscale x 1 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv1f64_to_ui8 = fptoui <vscale x 1 x double> poison to <vscale x 1 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv1f64_to_si16 = fptosi <vscale x 1 x double> poison to <vscale x 1 x i16> @@ -46,38 +46,38 @@ define void @sve-fptoi() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4f16_to_ui8 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4f16_to_si32 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4f16_to_ui32 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f16_to_si64 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f16_to_ui64 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv4f16_to_si64 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv4f16_to_ui64 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4f32_to_si8 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4f32_to_si16 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4f32_to_ui16 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f32_to_si64 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si8 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si16 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui16 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si32 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv4f32_to_si64 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4f64_to_si8 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i8> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i8> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4f64_to_si16 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i16> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4f64_to_ui16 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i16> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4f64_to_si32 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i32> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv8f16_to_si8 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv8f16_to_ui8 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_si32 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_ui32 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_si64 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_ui64 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si8 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si16 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui16 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si64 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i64> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si8 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si16 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui16 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si32 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i32> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv8f16_to_si32 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv8f16_to_ui32 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %nv8f16_to_si64 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 10 for: %nv8f16_to_ui64 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv8f32_to_si8 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv8f32_to_si16 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv8f32_to_ui16 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %nv8f32_to_si64 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8f64_to_si8 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8f64_to_si16 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8f64_to_ui16 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv8f64_to_si32 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %nv1f16_to_si8 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i8> diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll index 5b30c33..1cf4e4a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll @@ -10,13 +10,13 @@ define void @sve_fptruncs() { ; CHECK-LABEL: 'sve_fptruncs' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x half> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x float> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x half> @@ -36,30 +36,30 @@ define void @sve_fptruncs() { define void @sve_fptruncs_bf16() { ; CHECK-SVE-LABEL: 'sve_fptruncs_bf16' -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat> -; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat> +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat> +; CHECK-SVE-NEXT: Cost Model: Found costs of 17 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat> ; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat> ; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat> ; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat> ; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-SVE2-LABEL: 'sve_fptruncs_bf16' -; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat> -; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat> -; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat> -; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat> -; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat> -; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat> +; CHECK-SVE2-NEXT: Cost Model: Found costs of 8 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat> +; CHECK-SVE2-NEXT: Cost Model: Found costs of 8 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat> +; CHECK-SVE2-NEXT: Cost Model: Found costs of 17 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat> +; CHECK-SVE2-NEXT: Cost Model: Found costs of 9 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat> +; CHECK-SVE2-NEXT: Cost Model: Found costs of 19 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat> +; CHECK-SVE2-NEXT: Cost Model: Found costs of 39 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat> ; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-BF16-LABEL: 'sve_fptruncs_bf16' ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat> ; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat> -; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 3 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 2 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 5 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found costs of 11 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat> ; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat> diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index e007800..0836ef1 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-1 -; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-2 +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2p1 | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE2p1-OR-SME2 +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme2 | FileCheck %s --check-prefixes=CHECK-VSCALE-2,CHECK-SVE2p1-OR-SME2 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -intrinsic-cost-strategy=type-based-intrinsic-cost -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=TYPE_BASED_ONLY target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -613,197 +615,203 @@ declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>) declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>) declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>) -define void @vector_splice() #0 { +define void @vector_splice(i32 %offset) #0 { ; CHECK-VSCALE-1-LABEL: 'vector_splice' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i16_neg = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2i16_neg = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4i16_neg = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8i16_neg = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16i16_neg = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4i32_neg = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8i32_neg = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i64_neg = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2i64_neg = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4i64_neg = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f16_neg = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2f16_neg = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4f16_neg = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8f16_neg = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16f16_neg = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f32_neg = call <vscale x 1 x float> @llvm.vector.splice.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2f32_neg = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4f32_neg = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8f32_neg = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f64_neg = call <vscale x 1 x double> @llvm.vector.splice.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2f64_neg = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4f64_neg = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1bf16_neg = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2bf16_neg = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %1 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %2 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %3 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %4 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %5 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %6 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %7 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %8 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %9 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %10 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %11 = call <vscale x 2 x half> @llvm.vector.splice.left.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %12 = call <vscale x 4 x half> @llvm.vector.splice.left.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %13 = call <vscale x 8 x half> @llvm.vector.splice.left.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %14 = call <vscale x 16 x half> @llvm.vector.splice.left.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %15 = call <vscale x 2 x float> @llvm.vector.splice.left.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %16 = call <vscale x 4 x float> @llvm.vector.splice.left.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %17 = call <vscale x 8 x float> @llvm.vector.splice.left.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %18 = call <vscale x 2 x double> @llvm.vector.splice.left.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %19 = call <vscale x 4 x double> @llvm.vector.splice.left.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %20 = call <vscale x 2 x bfloat> @llvm.vector.splice.left.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %21 = call <vscale x 4 x bfloat> @llvm.vector.splice.left.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %22 = call <vscale x 8 x bfloat> @llvm.vector.splice.left.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %23 = call <vscale x 16 x bfloat> @llvm.vector.splice.left.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %24 = call <vscale x 16 x i1> @llvm.vector.splice.left.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %25 = call <vscale x 8 x i1> @llvm.vector.splice.left.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %26 = call <vscale x 4 x i1> @llvm.vector.splice.left.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %27 = call <vscale x 2 x i1> @llvm.vector.splice.left.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %28 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %29 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %30 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %31 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %32 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %33 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %34 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %35 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %36 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %37 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %38 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %39 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %40 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %41 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %42 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %43 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %44 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %45 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %46 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %47 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %48 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %49 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %50 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %51 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %52 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %53 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %54 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %55 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %56 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %57 = call <vscale x 16 x i1> @llvm.vector.splice.right.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %58 = call <vscale x 8 x i1> @llvm.vector.splice.right.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %59 = call <vscale x 4 x i1> @llvm.vector.splice.right.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %60 = call <vscale x 2 x i1> @llvm.vector.splice.right.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %61 = call <vscale x 1 x i1> @llvm.vector.splice.right.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %left.variable = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %right.variable = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'vector_splice' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i16_neg = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2i16_neg = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4i16_neg = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8i16_neg = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16i16_neg = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4i32_neg = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8i32_neg = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i64_neg = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2i64_neg = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4i64_neg = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f16_neg = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2f16_neg = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4f16_neg = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8f16_neg = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16f16_neg = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f32_neg = call <vscale x 1 x float> @llvm.vector.splice.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2f32_neg = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4f32_neg = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8f32_neg = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f64_neg = call <vscale x 1 x double> @llvm.vector.splice.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2f64_neg = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4f64_neg = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1bf16_neg = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv2bf16_neg = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %1 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %2 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %3 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %4 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %5 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %6 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %7 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %8 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %9 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %10 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %11 = call <vscale x 2 x half> @llvm.vector.splice.left.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %12 = call <vscale x 4 x half> @llvm.vector.splice.left.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %13 = call <vscale x 8 x half> @llvm.vector.splice.left.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %14 = call <vscale x 16 x half> @llvm.vector.splice.left.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %15 = call <vscale x 2 x float> @llvm.vector.splice.left.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %16 = call <vscale x 4 x float> @llvm.vector.splice.left.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %17 = call <vscale x 8 x float> @llvm.vector.splice.left.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %18 = call <vscale x 2 x double> @llvm.vector.splice.left.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %19 = call <vscale x 4 x double> @llvm.vector.splice.left.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %20 = call <vscale x 2 x bfloat> @llvm.vector.splice.left.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %21 = call <vscale x 4 x bfloat> @llvm.vector.splice.left.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %22 = call <vscale x 8 x bfloat> @llvm.vector.splice.left.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %23 = call <vscale x 16 x bfloat> @llvm.vector.splice.left.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %24 = call <vscale x 16 x i1> @llvm.vector.splice.left.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %25 = call <vscale x 8 x i1> @llvm.vector.splice.left.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %26 = call <vscale x 4 x i1> @llvm.vector.splice.left.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %27 = call <vscale x 2 x i1> @llvm.vector.splice.left.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %28 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %29 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %30 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %31 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %32 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %33 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %34 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %35 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %36 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %37 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %38 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %39 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %40 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %41 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %42 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %43 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %44 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %45 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %46 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %47 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %48 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %49 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %50 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %51 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %52 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %53 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %54 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %55 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %56 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %57 = call <vscale x 16 x i1> @llvm.vector.splice.right.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %58 = call <vscale x 8 x i1> @llvm.vector.splice.right.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %59 = call <vscale x 4 x i1> @llvm.vector.splice.right.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %60 = call <vscale x 2 x i1> @llvm.vector.splice.right.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %61 = call <vscale x 1 x i1> @llvm.vector.splice.right.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %left.variable = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %right.variable = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_splice' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i16_neg = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2i16_neg = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i16_neg = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8i16_neg = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16i16_neg = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i32_neg = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8i32_neg = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i64_neg = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2i64_neg = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i64_neg = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f16_neg = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2f16_neg = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4f16_neg = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8f16_neg = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16f16_neg = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f32_neg = call <vscale x 1 x float> @llvm.vector.splice.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2f32_neg = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4f32_neg = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8f32_neg = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1f64_neg = call <vscale x 1 x double> @llvm.vector.splice.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2f64_neg = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4f64_neg = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1bf16_neg = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2bf16_neg = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %5 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %6 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %7 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %8 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %9 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %10 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %11 = call <vscale x 2 x half> @llvm.vector.splice.left.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %12 = call <vscale x 4 x half> @llvm.vector.splice.left.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %13 = call <vscale x 8 x half> @llvm.vector.splice.left.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %14 = call <vscale x 16 x half> @llvm.vector.splice.left.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %15 = call <vscale x 2 x float> @llvm.vector.splice.left.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %16 = call <vscale x 4 x float> @llvm.vector.splice.left.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %17 = call <vscale x 8 x float> @llvm.vector.splice.left.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %18 = call <vscale x 2 x double> @llvm.vector.splice.left.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %19 = call <vscale x 4 x double> @llvm.vector.splice.left.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %20 = call <vscale x 2 x bfloat> @llvm.vector.splice.left.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %21 = call <vscale x 4 x bfloat> @llvm.vector.splice.left.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %22 = call <vscale x 8 x bfloat> @llvm.vector.splice.left.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %23 = call <vscale x 16 x bfloat> @llvm.vector.splice.left.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %24 = call <vscale x 16 x i1> @llvm.vector.splice.left.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %25 = call <vscale x 8 x i1> @llvm.vector.splice.left.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %26 = call <vscale x 4 x i1> @llvm.vector.splice.left.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %27 = call <vscale x 2 x i1> @llvm.vector.splice.left.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %28 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %29 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %30 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %31 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %32 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %33 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %34 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %35 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %36 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %37 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %38 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %39 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %40 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %41 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %42 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %43 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %44 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %45 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %46 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %47 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %48 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %49 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %50 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %51 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %52 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %53 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %54 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %55 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %56 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %57 = call <vscale x 16 x i1> @llvm.vector.splice.right.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %58 = call <vscale x 8 x i1> @llvm.vector.splice.right.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %59 = call <vscale x 4 x i1> @llvm.vector.splice.right.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %60 = call <vscale x 2 x i1> @llvm.vector.splice.right.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %61 = call <vscale x 1 x i1> @llvm.vector.splice.right.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %left.variable = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %right.variable = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; @@ -869,6 +877,9 @@ define void @vector_splice() #0 { %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1) %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1) %splice_nxv1i1_neg = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i32 -1) + + %left.variable = call <vscale x 4 x i32> @llvm.vector.splice.left(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) + %right.variable = call <vscale x 4 x i32> @llvm.vector.splice.right(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) ret void } @@ -920,7 +931,8 @@ define void @get_lane_mask() #0 { ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 13 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) @@ -934,28 +946,53 @@ define void @get_lane_mask() #0 { ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; -; CHECK-VSCALE-2-LABEL: 'get_lane_mask' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-SVE-LABEL: 'get_lane_mask' +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 13 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 5 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-SVE2p1-OR-SME2-LABEL: 'get_lane_mask' +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 5 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) +; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask' ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) @@ -966,7 +1003,8 @@ define void @get_lane_mask() #0 { ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 13 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 5 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) @@ -990,6 +1028,7 @@ define void @get_lane_mask() #0 { %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) + %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison) %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) @@ -1416,6 +1455,7 @@ declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32) declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32) declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32) declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32) +declare <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64, i64) declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64) declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16, i16) declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-itofp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-itofp.ll index 8a85973..bb7f804 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-itofp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-itofp.ll @@ -6,102 +6,102 @@ target triple = "aarch64-unknown-linux-gnu" define void @sve-itofp() { ; CHECK-LABEL: 'sve-itofp' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv2si8_to_f16 = sitofp <vscale x 2 x i8> poison to <vscale x 2 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv2ui8_to_f16 = uitofp <vscale x 2 x i8> poison to <vscale x 2 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv2si8_to_f16 = sitofp <vscale x 2 x i8> poison to <vscale x 2 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv2ui8_to_f16 = uitofp <vscale x 2 x i8> poison to <vscale x 2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si16_to_f16 = sitofp <vscale x 2 x i16> poison to <vscale x 2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui16_to_f16 = uitofp <vscale x 2 x i16> poison to <vscale x 2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si32_to_f16 = sitofp <vscale x 2 x i32> poison to <vscale x 2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui32_to_f16 = uitofp <vscale x 2 x i32> poison to <vscale x 2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si64_to_f16 = sitofp <vscale x 2 x i64> poison to <vscale x 2 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui64_to_f16 = uitofp <vscale x 2 x i64> poison to <vscale x 2 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv2si8_to_f32 = sitofp <vscale x 2 x i8> poison to <vscale x 2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv2ui8_to_f32 = uitofp <vscale x 2 x i8> poison to <vscale x 2 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv2si8_to_f32 = sitofp <vscale x 2 x i8> poison to <vscale x 2 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv2ui8_to_f32 = uitofp <vscale x 2 x i8> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si16_to_f32 = sitofp <vscale x 2 x i16> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui16_to_f32 = uitofp <vscale x 2 x i16> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si32_to_f32 = sitofp <vscale x 2 x i32> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui32_to_f32 = uitofp <vscale x 2 x i32> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si64_to_f32 = sitofp <vscale x 2 x i64> poison to <vscale x 2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui64_to_f32 = uitofp <vscale x 2 x i64> poison to <vscale x 2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv2si8_to_f64 = sitofp <vscale x 2 x i8> poison to <vscale x 2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv2ui8_to_f64 = uitofp <vscale x 2 x i8> poison to <vscale x 2 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv2si8_to_f64 = sitofp <vscale x 2 x i8> poison to <vscale x 2 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv2ui8_to_f64 = uitofp <vscale x 2 x i8> poison to <vscale x 2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si16_to_f64 = sitofp <vscale x 2 x i16> poison to <vscale x 2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui16_to_f64 = uitofp <vscale x 2 x i16> poison to <vscale x 2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si32_to_f64 = sitofp <vscale x 2 x i32> poison to <vscale x 2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui32_to_f64 = uitofp <vscale x 2 x i32> poison to <vscale x 2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2si64_to_f64 = sitofp <vscale x 2 x i64> poison to <vscale x 2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv2ui64_to_f64 = uitofp <vscale x 2 x i64> poison to <vscale x 2 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si8_to_f16 = sitofp <vscale x 4 x i8> poison to <vscale x 4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui8_to_f16 = uitofp <vscale x 4 x i8> poison to <vscale x 4 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv4si8_to_f16 = sitofp <vscale x 4 x i8> poison to <vscale x 4 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv4ui8_to_f16 = uitofp <vscale x 4 x i8> poison to <vscale x 4 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4si16_to_f16 = sitofp <vscale x 4 x i16> poison to <vscale x 4 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4ui16_to_f16 = uitofp <vscale x 4 x i16> poison to <vscale x 4 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4si32_to_f16 = sitofp <vscale x 4 x i32> poison to <vscale x 4 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4ui32_to_f16 = uitofp <vscale x 4 x i32> poison to <vscale x 4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si64_to_f16 = sitofp <vscale x 4 x i64> poison to <vscale x 4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui64_to_f16 = uitofp <vscale x 4 x i64> poison to <vscale x 4 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si8_to_f32 = sitofp <vscale x 4 x i8> poison to <vscale x 4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui8_to_f32 = uitofp <vscale x 4 x i8> poison to <vscale x 4 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4si64_to_f16 = sitofp <vscale x 4 x i64> poison to <vscale x 4 x half> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4ui64_to_f16 = uitofp <vscale x 4 x i64> poison to <vscale x 4 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv4si8_to_f32 = sitofp <vscale x 4 x i8> poison to <vscale x 4 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv4ui8_to_f32 = uitofp <vscale x 4 x i8> poison to <vscale x 4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4si16_to_f32 = sitofp <vscale x 4 x i16> poison to <vscale x 4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4ui16_to_f32 = uitofp <vscale x 4 x i16> poison to <vscale x 4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4si32_to_f32 = sitofp <vscale x 4 x i32> poison to <vscale x 4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv4ui32_to_f32 = uitofp <vscale x 4 x i32> poison to <vscale x 4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si64_to_f32 = sitofp <vscale x 4 x i64> poison to <vscale x 4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui64_to_f32 = uitofp <vscale x 4 x i64> poison to <vscale x 4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si8_to_f64 = sitofp <vscale x 4 x i8> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui8_to_f64 = uitofp <vscale x 4 x i8> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si16_to_f64 = sitofp <vscale x 4 x i16> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui16_to_f64 = uitofp <vscale x 4 x i16> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si32_to_f64 = sitofp <vscale x 4 x i32> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui32_to_f64 = uitofp <vscale x 4 x i32> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv4si64_to_f64 = sitofp <vscale x 4 x i64> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv4ui64_to_f64 = uitofp <vscale x 4 x i64> poison to <vscale x 4 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si8_to_f16 = sitofp <vscale x 8 x i8> poison to <vscale x 8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui8_to_f16 = uitofp <vscale x 8 x i8> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4si64_to_f32 = sitofp <vscale x 4 x i64> poison to <vscale x 4 x float> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv4ui64_to_f32 = uitofp <vscale x 4 x i64> poison to <vscale x 4 x float> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv4si8_to_f64 = sitofp <vscale x 4 x i8> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv4ui8_to_f64 = uitofp <vscale x 4 x i8> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv4si16_to_f64 = sitofp <vscale x 4 x i16> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv4ui16_to_f64 = uitofp <vscale x 4 x i16> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv4si32_to_f64 = sitofp <vscale x 4 x i32> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv4ui32_to_f64 = uitofp <vscale x 4 x i32> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv4si64_to_f64 = sitofp <vscale x 4 x i64> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv4ui64_to_f64 = uitofp <vscale x 4 x i64> poison to <vscale x 4 x double> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv8si8_to_f16 = sitofp <vscale x 8 x i8> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv8ui8_to_f16 = uitofp <vscale x 8 x i8> poison to <vscale x 8 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv8si16_to_f16 = sitofp <vscale x 8 x i16> poison to <vscale x 8 x half> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %nv8ui16_to_f16 = uitofp <vscale x 8 x i16> poison to <vscale x 8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si32_to_f16 = sitofp <vscale x 8 x i32> poison to <vscale x 8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui32_to_f16 = uitofp <vscale x 8 x i32> poison to <vscale x 8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si64_to_f16 = sitofp <vscale x 8 x i64> poison to <vscale x 8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui64_to_f16 = uitofp <vscale x 8 x i64> poison to <vscale x 8 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si8_to_f32 = sitofp <vscale x 8 x i8> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui8_to_f32 = uitofp <vscale x 8 x i8> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si16_to_f32 = sitofp <vscale x 8 x i16> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui16_to_f32 = uitofp <vscale x 8 x i16> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si32_to_f32 = sitofp <vscale x 8 x i32> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui32_to_f32 = uitofp <vscale x 8 x i32> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si64_to_f32 = sitofp <vscale x 8 x i64> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui64_to_f32 = uitofp <vscale x 8 x i64> poison to <vscale x 8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si8_to_f64 = sitofp <vscale x 8 x i8> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui8_to_f64 = uitofp <vscale x 8 x i8> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si16_to_f64 = sitofp <vscale x 8 x i16> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui16_to_f64 = uitofp <vscale x 8 x i16> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si32_to_f64 = sitofp <vscale x 8 x i32> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui32_to_f64 = uitofp <vscale x 8 x i32> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8si64_to_f64 = sitofp <vscale x 8 x i64> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8ui64_to_f64 = uitofp <vscale x 8 x i64> poison to <vscale x 8 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si8_to_f16 = sitofp <vscale x 16 x i8> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui8_to_f16 = uitofp <vscale x 16 x i8> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si16_to_f16 = sitofp <vscale x 16 x i16> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui16_to_f16 = uitofp <vscale x 16 x i16> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si32_to_f16 = sitofp <vscale x 16 x i32> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui32_to_f16 = uitofp <vscale x 16 x i32> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si64_to_f16 = sitofp <vscale x 16 x i64> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui64_to_f16 = uitofp <vscale x 16 x i64> poison to <vscale x 16 x half> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si8_to_f32 = sitofp <vscale x 16 x i8> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui8_to_f32 = uitofp <vscale x 16 x i8> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si16_to_f32 = sitofp <vscale x 16 x i16> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui16_to_f32 = uitofp <vscale x 16 x i16> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si32_to_f32 = sitofp <vscale x 16 x i32> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui32_to_f32 = uitofp <vscale x 16 x i32> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si64_to_f32 = sitofp <vscale x 16 x i64> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui64_to_f32 = uitofp <vscale x 16 x i64> poison to <vscale x 16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si8_to_f64 = sitofp <vscale x 16 x i8> poison to <vscale x 16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui8_to_f64 = uitofp <vscale x 16 x i8> poison to <vscale x 16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si16_to_f64 = sitofp <vscale x 16 x i16> poison to <vscale x 16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui16_to_f64 = uitofp <vscale x 16 x i16> poison to <vscale x 16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si32_to_f64 = sitofp <vscale x 16 x i32> poison to <vscale x 16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui32_to_f64 = uitofp <vscale x 16 x i32> poison to <vscale x 16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv16si64_to_f64 = sitofp <vscale x 16 x i64> poison to <vscale x 16 x double> -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv16ui64_to_f64 = uitofp <vscale x 16 x i64> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv8si32_to_f16 = sitofp <vscale x 8 x i32> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %nv8ui32_to_f16 = uitofp <vscale x 8 x i32> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8si64_to_f16 = sitofp <vscale x 8 x i64> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8ui64_to_f16 = uitofp <vscale x 8 x i64> poison to <vscale x 8 x half> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8si8_to_f32 = sitofp <vscale x 8 x i8> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %nv8ui8_to_f32 = uitofp <vscale x 8 x i8> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv8si16_to_f32 = sitofp <vscale x 8 x i16> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv8ui16_to_f32 = uitofp <vscale x 8 x i16> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv8si32_to_f32 = sitofp <vscale x 8 x i32> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv8ui32_to_f32 = uitofp <vscale x 8 x i32> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv8si64_to_f32 = sitofp <vscale x 8 x i64> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv8ui64_to_f32 = uitofp <vscale x 8 x i64> poison to <vscale x 8 x float> +; CHECK-NEXT: Cost Model: Found costs of 21 for: %nv8si8_to_f64 = sitofp <vscale x 8 x i8> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 21 for: %nv8ui8_to_f64 = uitofp <vscale x 8 x i8> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %nv8si16_to_f64 = sitofp <vscale x 8 x i16> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %nv8ui16_to_f64 = uitofp <vscale x 8 x i16> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %nv8si32_to_f64 = sitofp <vscale x 8 x i32> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %nv8ui32_to_f64 = uitofp <vscale x 8 x i32> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv8si64_to_f64 = sitofp <vscale x 8 x i64> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv8ui64_to_f64 = uitofp <vscale x 8 x i64> poison to <vscale x 8 x double> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv16si8_to_f16 = sitofp <vscale x 16 x i8> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv16ui8_to_f16 = uitofp <vscale x 16 x i8> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv16si16_to_f16 = sitofp <vscale x 16 x i16> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %nv16ui16_to_f16 = uitofp <vscale x 16 x i16> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv16si32_to_f16 = sitofp <vscale x 16 x i32> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 6 for: %nv16ui32_to_f16 = uitofp <vscale x 16 x i32> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 14 for: %nv16si64_to_f16 = sitofp <vscale x 16 x i64> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 14 for: %nv16ui64_to_f16 = uitofp <vscale x 16 x i64> poison to <vscale x 16 x half> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %nv16si8_to_f32 = sitofp <vscale x 16 x i8> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 20 for: %nv16ui8_to_f32 = uitofp <vscale x 16 x i8> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %nv16si16_to_f32 = sitofp <vscale x 16 x i16> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %nv16ui16_to_f32 = uitofp <vscale x 16 x i16> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv16si32_to_f32 = sitofp <vscale x 16 x i32> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 4 for: %nv16ui32_to_f32 = uitofp <vscale x 16 x i32> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %nv16si64_to_f32 = sitofp <vscale x 16 x i64> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 12 for: %nv16ui64_to_f32 = uitofp <vscale x 16 x i64> poison to <vscale x 16 x float> +; CHECK-NEXT: Cost Model: Found costs of 43 for: %nv16si8_to_f64 = sitofp <vscale x 16 x i8> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 43 for: %nv16ui8_to_f64 = uitofp <vscale x 16 x i8> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 40 for: %nv16si16_to_f64 = sitofp <vscale x 16 x i16> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 40 for: %nv16ui16_to_f64 = uitofp <vscale x 16 x i16> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 24 for: %nv16si32_to_f64 = sitofp <vscale x 16 x i32> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 24 for: %nv16ui32_to_f64 = uitofp <vscale x 16 x i32> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %nv16si64_to_f64 = sitofp <vscale x 16 x i64> poison to <vscale x 16 x double> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %nv16ui64_to_f64 = uitofp <vscale x 16 x i64> poison to <vscale x 16 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll b/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll index 6be5397..1df82b5 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll @@ -48,7 +48,7 @@ define void @scalable_stores() { define void @scalable_ext_loads() { ; CHECK-LABEL: 'scalable_ext_loads' ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv16i8 = load <vscale x 16 x i8>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv8i8 = load <vscale x 8 x i8>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv8i8to16 = zext <vscale x 8 x i8> %load.nxv8i8 to <vscale x 8 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv4i8 = load <vscale x 4 x i8>, ptr undef, align 4 @@ -56,13 +56,13 @@ define void @scalable_ext_loads() { ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv2i8 = load <vscale x 2 x i8>, ptr undef, align 2 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv2i8to64 = zext <vscale x 2 x i8> %load.nxv2i8 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv8i16 = load <vscale x 8 x i16>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv4i16 = load <vscale x 4 x i16>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv4i16to32 = zext <vscale x 4 x i16> %load.nxv4i16 to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv2i16 = load <vscale x 2 x i16>, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv2i16to64 = zext <vscale x 2 x i16> %load.nxv2i16 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv4i32 = load <vscale x 4 x i32>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv2i32 = load <vscale x 2 x i32>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %zext.nxv2i32to64 = zext <vscale x 2 x i32> %load.nxv2i32 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv4i8.2 = load <vscale x 4 x i8>, ptr undef, align 4 @@ -72,9 +72,9 @@ define void @scalable_ext_loads() { ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv8i8.2 = load <vscale x 8 x i8>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %zext.nxv8i8to32 = zext <vscale x 8 x i8> %load.nxv8i8.2 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load.nxv8i8.3 = load <vscale x 8 x i8>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext.nxv8i8to64 = zext <vscale x 8 x i8> %load.nxv8i8.3 to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %zext.nxv8i8to64 = zext <vscale x 8 x i8> %load.nxv8i8.3 to <vscale x 8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv16i8 = load <vscale x 16 x i8>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv8i8 = load <vscale x 8 x i8>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv8i8to16 = sext <vscale x 8 x i8> %load2.nxv8i8 to <vscale x 8 x i16> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv4i8 = load <vscale x 4 x i8>, ptr undef, align 4 @@ -82,13 +82,13 @@ define void @scalable_ext_loads() { ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv2i8 = load <vscale x 2 x i8>, ptr undef, align 2 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv2i8to64 = sext <vscale x 2 x i8> %load2.nxv2i8 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv8i16 = load <vscale x 8 x i16>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv4i16 = load <vscale x 4 x i16>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv4i16to32 = sext <vscale x 4 x i16> %load2.nxv4i16 to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv2i16 = load <vscale x 2 x i16>, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv2i16to64 = sext <vscale x 2 x i16> %load2.nxv2i16 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv4i32 = load <vscale x 4 x i32>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv2i32 = load <vscale x 2 x i32>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 0 for: %sext.nxv2i32to64 = sext <vscale x 2 x i32> %load2.nxv2i32 to <vscale x 2 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv4i8.2 = load <vscale x 4 x i8>, ptr undef, align 4 @@ -98,7 +98,7 @@ define void @scalable_ext_loads() { ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv8i8.2 = load <vscale x 8 x i8>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %sext.nxv8i8to32 = sext <vscale x 8 x i8> %load2.nxv8i8.2 to <vscale x 8 x i32> ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load2.nxv8i8.3 = load <vscale x 8 x i8>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext.nxv8i8to64 = sext <vscale x 8 x i8> %load2.nxv8i8.3 to <vscale x 8 x i64> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %sext.nxv8i8to64 = sext <vscale x 8 x i8> %load2.nxv8i8.3 to <vscale x 8 x i64> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll index f7d3719..f1f43b1 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll @@ -5,18 +5,18 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @sve_truncs() { ; CHECK-LABEL: 'sve_truncs' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> poison to <vscale x 2 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> poison to <vscale x 2 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> poison to <vscale x 4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> poison to <vscale x 4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> poison to <vscale x 4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> poison to <vscale x 8 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> poison to <vscale x 8 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i1> -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> poison to <vscale x 2 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> poison to <vscale x 2 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> poison to <vscale x 4 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> poison to <vscale x 4 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> poison to <vscale x 4 x i1> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> poison to <vscale x 8 x i1> +; CHECK-NEXT: Cost Model: Found costs of 2 for: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> poison to <vscale x 8 x i1> +; CHECK-NEXT: Cost Model: Found costs of 5 for: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i1> +; CHECK-NEXT: Cost Model: Found costs of 11 for: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i1> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> poison to <vscale x 2 x i8> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i8> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i8> @@ -31,12 +31,12 @@ define void @sve_truncs() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 0 for: %trunc_nxv8i16_to_i8 = trunc <vscale x 8 x i16> poison to <vscale x 8 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %trunc_nxv8i32_to_i8 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i8> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i8> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i16> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i16> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i16> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> poison to <vscale x 16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> poison to <vscale x 16 x i8> -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> poison to <vscale x 16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 3 for: %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> poison to <vscale x 16 x i8> +; CHECK-NEXT: Cost Model: Found costs of 7 for: %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> poison to <vscale x 16 x i8> ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> poison to <vscale x 2 x i1> diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll index dcbe239..23d241b 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -119,7 +119,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_ogt' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp ogt <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -179,7 +179,7 @@ define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_ogt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_ogt' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; @@ -206,7 +206,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_oge' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp oge <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -266,7 +266,7 @@ define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_oge(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_oge' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; @@ -293,7 +293,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_olt' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp olt <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -353,7 +353,7 @@ define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_olt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_olt' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; @@ -380,7 +380,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_ole' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp ole <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -440,7 +440,7 @@ define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_ole(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_ole' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; @@ -467,7 +467,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_oeq' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp oeq <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -527,7 +527,7 @@ define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_oeq(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_oeq' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; @@ -554,7 +554,7 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_one' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp one <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 11 for: %cmp.1 = fcmp one <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -614,7 +614,7 @@ define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_one(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_one' -; COST-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 11 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; @@ -641,7 +641,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_une' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp une <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -701,7 +701,7 @@ define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_une(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_une' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 7 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; @@ -728,7 +728,7 @@ define <4 x half> @v4f16_select_ord(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_ord(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_ord' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp ord <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of 11 for: %cmp.1 = fcmp ord <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -788,7 +788,7 @@ define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_ord(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_ord' -; COST-NEXT: Cost Model: Found costs of RThru:11 CodeSize:9 Lat:9 SizeLat:9 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of 11 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b ; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index ab9d7f9..99a9bc2 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -739,5 +739,5 @@ define i32 @frem(i32 %arg) { ret i32 undef } -attributes #0 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } -attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } +attributes #0 = { nounwind denormal_fpenv(float: ieee|ieee) } +attributes #1 = { nounwind denormal_fpenv(float: preservesign) } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll index 3aa682c..78d43e8 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -13,153 +13,153 @@ define amdgpu_kernel void @shufflevector_i16(<2 x i16> %vec1, <2 x i16> %vec2) { ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 3> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 3> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 3> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; VI-LABEL: 'shufflevector_i16' -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> ; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> ; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 3> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 3> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-10-SIZE-LABEL: 'shufflevector_i16' @@ -167,153 +167,153 @@ define amdgpu_kernel void @shufflevector_i16(<2 x i16> %vec1, <2 x i16> %vec2) { ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 3> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 3> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 3> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; VI-SIZE-LABEL: 'shufflevector_i16' -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 0, i32 3> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 3> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 1, i32 3> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 1> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 2, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> <i32 3, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 0, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 0, i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 0, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 1, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 1, i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 1, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> <i32 2, i32 2, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 2> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 0, i32 3> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 3> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 1, i32 3> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 1> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 2, i32 3> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> <i32 3, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 0, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 0, i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 0, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 1, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 1, i32 2, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 1, i32 2> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> <i32 2, i32 2, i32 1> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer @@ -396,157 +396,157 @@ define amdgpu_kernel void @shufflevector_i16(<2 x i16> %vec1, <2 x i16> %vec2) { ; Should not assert define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) { ; ALL-LABEL: 'shufflevector_i8' -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 2> ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 1> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 2> ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 1> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 0> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 1> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 2> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 0> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 2> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 1> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'shufflevector_i8' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 2> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 0, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 1, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 1> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 2, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> <i32 3, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 0, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 0, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 0, i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 0, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 1, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 1, i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 1, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> <i32 2, i32 2, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 2> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 0, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 1, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 1> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 2, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 0> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 1> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 2> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> <i32 3, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 0, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 0, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 0, i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 0, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 0> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 1, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 1, i32 2, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 1, i32 2> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> <i32 2, i32 2, i32 1> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer @@ -861,28 +861,30 @@ define amdgpu_kernel void @shufflevector_i32(<2 x i32> %vec1, <2 x i32> %vec2) { ; Other shuffle cases define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> %i8v4_2, <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i8> %i8v8, <8 x i8> %i8v8_2, <16 x i8> %i8v16, <16 x i8> %i8v16_2, <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i16> %i16v4, <4 x i16> %i16v4_2, <8 x i16> %i16v8, <8 x i16> %i16v8_2, <2 x i32> %i32v2, <2 x i32> %i32v2_2, <4 x i32> %i32v4, <4 x i32> %i32v4_2, <2 x float> %floatv2, <2 x float> %floatv2_2, <4 x float> %floatv4, <4 x float> %floatv4_2,<2 x i64> %i64v2, <2 x i64> %i64v2_2,<2 x double> %doublev2, <2 x double> %doublev2_2) { ; GFX9-10-LABEL: 'shuffle' -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 poison, i32 12, i32 poison, i32 5, i32 9, i32 13> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> <i32 1, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> <i32 1, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> <i32 1, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> <i32 1, i32 0> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> @@ -898,28 +900,30 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; VI-LABEL: 'shuffle' -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> <i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> <i32 1, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 poison, i32 12, i32 poison, i32 5, i32 9, i32 13> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> <i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> <i32 1, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> <i32 1, i32 0> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> <i32 1, i32 0> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> @@ -935,28 +939,30 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-10-SIZE-LABEL: 'shuffle' -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 poison, i32 12, i32 poison, i32 5, i32 9, i32 13> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> <i32 1, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> <i32 1, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> <i32 1, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> <i32 1, i32 0> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> @@ -972,28 +978,30 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; VI-SIZE-LABEL: 'shuffle' -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> <i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> <i32 1, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> <i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> <i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 poison, i32 12, i32 poison, i32 5, i32 9, i32 13> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> <i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> <i32 1, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> <i32 1, i32 3, i32 2, i32 0> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> <i32 1, i32 0> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> <i32 1, i32 0> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> <i32 1, i32 3, i32 2, i32 0> @@ -1022,6 +1030,8 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> + %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> + %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> <i32 0, i32 4, i32 poison, i32 12, i32 poison, i32 5, i32 9, i32 13> %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1> %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> <i32 1, i32 0> @@ -1047,7 +1057,7 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i8> %i8v8, <8 x i8> %i8v8_2, <2 x half> %halfv2, <2 x half> %halfv2_2, <4 x half> %halfv4, <4 x half> %halfv4_2, <8 x half> %halfv8, <8 x half> %halfv8_2, <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i16> %i16v4, <4 x i16> %i16v4_2, <8 x i16> %i16v8, <8 x i16> %i16v8_2, <2 x i32> %i32v2, <2 x i32> %i32v2_2, <4 x i32> %i32v4, <4 x i32> %i32v4_2, <2 x float> %floatv2, <2 x float> %floatv2_2, <4 x float> %floatv4, <4 x float> %floatv4_2,<2 x i64> %i64v2, <2 x i64> %i64v2_2,<2 x double> %doublev2, <2 x double> %doublev2_2) { ; ALL-LABEL: 'concat' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1062,7 +1072,7 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> %floatv2, <2 x float> %floatv2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> %floatv4, <4 x float> %floatv4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> %doublev2, <2 x double> %doublev2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1080,7 +1090,7 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'concat' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1095,7 +1105,7 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> %floatv2, <2 x float> %floatv2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> %floatv4, <4 x float> %floatv4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> %doublev2, <2 x double> %doublev2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1144,3 +1154,53 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % %v4f64_2 = shufflevector <2 x double> %doublev2, <2 x double> %doublev2_2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret void } + +define void @insert(<16 x i8> %i8v16, <16 x i8> %i8v16_2) { +; ALL-LABEL: 'insert' +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test3 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %test4 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test5 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test6 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21> +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test7 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'insert' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test3 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %test4 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test5 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test6 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test7 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %test2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %test3 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %test4 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %test5 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> + %test6 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21> + %test7 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ret void +} + +define void @splice(<16 x i8> %i8v16, <16 x i8> %i8v16_2) { +; ALL-LABEL: 'splice' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'splice' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27> + %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25> + ret void +} diff --git a/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll index 1e9a5f7..c0410cf 100644 --- a/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/add-cast-vect.ll @@ -1,4 +1,6 @@ -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s + ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM @@ -15,13 +17,18 @@ target triple = "armv7--linux-gnueabihf" %T464 = type <4 x i64> define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { -; COST: function 'direct' +; COST-LABEL: 'direct' +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 %v1 = load %T432, ptr %loadaddr2 ; ASM: vld1.64 - %r3 = add %T432 %v0, %v1 -; COST: cost of 1 for instruction: {{.*}} add <4 x i32> + %r3 = add %T432 %v0, %v1 ; ASM: vadd.i32 store %T432 %r3, ptr %storeaddr ; ASM: vst1.64 @@ -29,16 +36,22 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { } define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { -; COST: function 'ups1632' +; COST-LABEL: 'ups1632' +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T416, ptr %loadaddr ; ASM: vldr %v1 = load %T416, ptr %loadaddr2 ; ASM: vldr %r1 = sext %T416 %v0 to %T432 %r2 = sext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32> - %r3 = add %T432 %r1, %r2 -; COST: cost of 1 for instruction: {{.*}} add <4 x i32> + %r3 = add %T432 %r1, %r2 ; ASM: vaddl.s16 store %T432 %r3, ptr %storeaddr ; ASM: vst1.64 @@ -46,16 +59,22 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { } define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { -; COST: function 'upu1632' +; COST-LABEL: 'upu1632' +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T416, ptr %loadaddr ; ASM: vldr %v1 = load %T416, ptr %loadaddr2 ; ASM: vldr %r1 = zext %T416 %v0 to %T432 %r2 = zext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32> - %r3 = add %T432 %r1, %r2 -; COST: cost of 1 for instruction: {{.*}} add <4 x i32> + %r3 = add %T432 %r1, %r2 ; ASM: vaddl.u16 store %T432 %r3, ptr %storeaddr ; ASM: vst1.64 @@ -63,51 +82,66 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { } define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { -; COST: function 'ups3264' +; COST-LABEL: 'ups3264' +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = add <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T232, ptr %loadaddr ; ASM: vldr %v1 = load %T232, ptr %loadaddr2 ; ASM: vldr - %r3 = add %T232 %v0, %v1 + %r3 = add %T232 %v0, %v1 ; ASM: vadd.i32 -; COST: cost of 1 for instruction: {{.*}} add <2 x i32> %st = sext %T232 %r3 to %T264 ; ASM: vmovl.s32 -; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, ptr %storeaddr ; ASM: vst1.64 ret void } define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { -; COST: function 'upu3264' +; COST-LABEL: 'upu3264' +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = add <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T232, ptr %loadaddr ; ASM: vldr %v1 = load %T232, ptr %loadaddr2 ; ASM: vldr - %r3 = add %T232 %v0, %v1 + %r3 = add %T232 %v0, %v1 ; ASM: vadd.i32 -; COST: cost of 1 for instruction: {{.*}} add <2 x i32> %st = zext %T232 %r3 to %T264 ; ASM: vmovl.u32 -; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, ptr %storeaddr ; ASM: vst1.64 ret void } define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { -; COST: function 'dn3216' +; COST-LABEL: 'dn3216' +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = add <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16> +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 %v1 = load %T432, ptr %loadaddr2 ; ASM: vld1.64 - %r3 = add %T432 %v0, %v1 + %r3 = add %T432 %v0, %v1 ; ASM: vadd.i32 -; COST: cost of 1 for instruction: {{.*}} add <4 x i32> %st = trunc %T432 %r3 to %T416 ; ASM: vmovn.i32 -; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16> store %T416 %st, ptr %storeaddr ; ASM: vstr ret void diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll index adb1a72..a953ba8 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll @@ -431,57 +431,57 @@ declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x define i32 @smul(i32 %arg) { ; V8M-LABEL: 'smul' -; V8M-NEXT: Cost Model: Found costs of 15 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:82 CodeSize:30 Lat:30 SizeLat:30 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:164 CodeSize:56 Lat:56 SizeLat:56 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:328 CodeSize:108 Lat:108 SizeLat:108 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found costs of 8 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:68 CodeSize:30 Lat:30 SizeLat:30 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:136 CodeSize:58 Lat:58 SizeLat:58 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:272 CodeSize:114 Lat:114 SizeLat:114 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found costs of 6 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:56 CodeSize:42 Lat:42 SizeLat:42 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:112 CodeSize:82 Lat:82 SizeLat:82 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:224 CodeSize:162 Lat:162 SizeLat:162 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found costs of 6 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:112 CodeSize:82 Lat:82 SizeLat:82 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:224 CodeSize:162 Lat:162 SizeLat:162 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:448 CodeSize:322 Lat:322 SizeLat:322 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; V8M-NEXT: Cost Model: Found costs of 8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-NEXT: Cost Model: Found costs of 16 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; V8M-NEXT: Cost Model: Found costs of 8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-NEXT: Cost Model: Found costs of 128 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef ; ; NEON-LABEL: 'smul' -; NEON-NEXT: Cost Model: Found costs of RThru:15 CodeSize:8 Lat:8 SizeLat:8 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; NEON-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:83 CodeSize:12 Lat:12 SizeLat:12 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:162 CodeSize:13 Lat:13 SizeLat:13 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:320 CodeSize:15 Lat:15 SizeLat:15 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; NEON-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:21 CodeSize:10 Lat:10 SizeLat:10 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:38 CodeSize:11 Lat:11 SizeLat:11 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:72 CodeSize:13 Lat:13 SizeLat:13 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found costs of 6 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:23 CodeSize:8 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:46 CodeSize:9 Lat:9 SizeLat:9 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:92 CodeSize:11 Lat:11 SizeLat:11 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found costs of 6 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:23 CodeSize:8 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:46 CodeSize:9 Lat:9 SizeLat:9 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:92 CodeSize:11 Lat:11 SizeLat:11 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; MVE-LABEL: 'smul' -; MVE-NEXT: Cost Model: Found costs of 15 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) +; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:508 CodeSize:74 Lat:108 SizeLat:108 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:1016 CodeSize:144 Lat:212 SizeLat:212 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:2032 CodeSize:284 Lat:420 SizeLat:420 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found costs of 8 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:284 CodeSize:150 Lat:152 SizeLat:152 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:872 CodeSize:328 Lat:332 SizeLat:332 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:2832 CodeSize:652 Lat:660 SizeLat:660 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found costs of 6 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:40 Lat:46 SizeLat:46 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:232 CodeSize:142 Lat:154 SizeLat:154 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:624 CodeSize:282 Lat:306 SizeLat:306 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found costs of 6 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:96 CodeSize:72 Lat:78 SizeLat:78 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:360 CodeSize:270 Lat:282 SizeLat:282 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:880 CodeSize:538 Lat:562 SizeLat:562 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) @@ -532,57 +532,57 @@ declare {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x define i32 @umul(i32 %arg) { ; V8M-LABEL: 'umul' -; V8M-NEXT: Cost Model: Found costs of 13 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:78 CodeSize:26 Lat:26 SizeLat:26 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:156 CodeSize:48 Lat:48 SizeLat:48 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:312 CodeSize:92 Lat:92 SizeLat:92 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-NEXT: Cost Model: Found costs of 7 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:64 CodeSize:26 Lat:26 SizeLat:26 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:128 CodeSize:50 Lat:50 SizeLat:50 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:256 CodeSize:98 Lat:98 SizeLat:98 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:48 CodeSize:34 Lat:34 SizeLat:34 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:96 CodeSize:66 Lat:66 SizeLat:66 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:192 CodeSize:130 Lat:130 SizeLat:130 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; V8M-NEXT: Cost Model: Found costs of RThru:96 CodeSize:66 Lat:66 SizeLat:66 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:192 CodeSize:130 Lat:130 SizeLat:130 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-NEXT: Cost Model: Found costs of RThru:384 CodeSize:258 Lat:258 SizeLat:258 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; V8M-NEXT: Cost Model: Found costs of 8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-NEXT: Cost Model: Found costs of 16 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; V8M-NEXT: Cost Model: Found costs of 8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; V8M-NEXT: Cost Model: Found costs of 16 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; V8M-NEXT: Cost Model: Found costs of 32 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-NEXT: Cost Model: Found costs of 64 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-NEXT: Cost Model: Found costs of 128 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef ; ; NEON-LABEL: 'umul' -; NEON-NEXT: Cost Model: Found costs of RThru:13 CodeSize:7 Lat:7 SizeLat:7 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; NEON-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:77 CodeSize:7 Lat:7 SizeLat:7 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:154 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:308 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; NEON-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:19 CodeSize:9 Lat:9 SizeLat:9 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:34 CodeSize:10 Lat:10 SizeLat:10 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:64 CodeSize:12 Lat:12 SizeLat:12 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; NEON-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:21 CodeSize:7 Lat:7 SizeLat:7 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:42 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:84 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; NEON-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; NEON-NEXT: Cost Model: Found costs of RThru:21 CodeSize:7 Lat:7 SizeLat:7 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:42 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:84 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; MVE-LABEL: 'umul' -; MVE-NEXT: Cost Model: Found costs of 13 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) +; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:472 CodeSize:38 Lat:72 SizeLat:72 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:944 CodeSize:72 Lat:140 SizeLat:140 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:1888 CodeSize:140 Lat:276 SizeLat:276 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-NEXT: Cost Model: Found costs of 7 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:186 CodeSize:149 Lat:150 SizeLat:150 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:484 CodeSize:326 Lat:328 SizeLat:328 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:1288 CodeSize:648 Lat:652 SizeLat:652 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:62 CodeSize:39 Lat:44 SizeLat:44 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:228 CodeSize:140 Lat:150 SizeLat:150 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:616 CodeSize:278 Lat:298 SizeLat:298 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; MVE-NEXT: Cost Model: Found costs of RThru:94 CodeSize:71 Lat:76 SizeLat:76 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:356 CodeSize:268 Lat:278 SizeLat:278 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:872 CodeSize:534 Lat:554 SizeLat:554 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll index c2248c2..e5bbac6 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 < %s | FileCheck %s --check-prefix=CHECK-NEON -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 < %s | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index 76f80da..461ad9d 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -278,9 +278,9 @@ define void @i64() { define void @f16() { ; CHECK-NEON-LABEL: 'f16' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %2 = frem half undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000 -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000 +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %4 = frem half undef, 0xH4000 ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'f16' @@ -306,9 +306,9 @@ define void @f16() { ; ; CHECK-V8R-LABEL: 'f16' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %2 = frem half undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000 -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000 +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %4 = frem half undef, 0xH4000 ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv half undef, undef @@ -321,9 +321,9 @@ define void @f16() { define void @f32() { ; CHECK-NEON-LABEL: 'f32' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %2 = frem float undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00 -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %4 = frem float undef, 2.000000e+00 ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'f32' @@ -349,9 +349,9 @@ define void @f32() { ; ; CHECK-V8R-LABEL: 'f32' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %2 = frem float undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00 -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00 +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %4 = frem float undef, 2.000000e+00 ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv float undef, undef @@ -364,9 +364,9 @@ define void @f32() { define void @f64() { ; CHECK-NEON-LABEL: 'f64' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %2 = frem double undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00 -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %4 = frem double undef, 2.000000e+00 ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'f64' @@ -392,9 +392,9 @@ define void @f64() { ; ; CHECK-V8R-LABEL: 'f64' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %2 = frem double undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00 -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00 +; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %4 = frem double undef, 2.000000e+00 ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv double undef, undef @@ -867,11 +867,11 @@ define void @vi64() { define void @vf16() { ; CHECK-NEON-LABEL: 'vf16' ; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf16' @@ -903,11 +903,11 @@ define void @vf16() { ; ; CHECK-V8R-LABEL: 'vf16' ; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x half> undef, undef @@ -922,11 +922,11 @@ define void @vf16() { define void @vf32() { ; CHECK-NEON-LABEL: 'vf32' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf32' @@ -958,11 +958,11 @@ define void @vf32() { ; ; CHECK-V8R-LABEL: 'vf32' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x float> undef, undef @@ -977,11 +977,11 @@ define void @vf32() { define void @vf64() { ; CHECK-NEON-LABEL: 'vf64' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf64' @@ -1013,11 +1013,11 @@ define void @vf64() { ; ; CHECK-V8R-LABEL: 'vf64' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x double> undef, undef @@ -1492,11 +1492,11 @@ define void @vi64_2() { define void @vf16_2() { ; CHECK-NEON-LABEL: 'vf16_2' ; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf16_2' @@ -1528,11 +1528,11 @@ define void @vf16_2() { ; ; CHECK-V8R-LABEL: 'vf16_2' ; CHECK-V8R-NEXT: Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x half> undef, <half 2., half 2.> @@ -1547,11 +1547,11 @@ define void @vf16_2() { define void @vf32_2() { ; CHECK-NEON-LABEL: 'vf32_2' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf32_2' @@ -1583,11 +1583,11 @@ define void @vf32_2() { ; ; CHECK-V8R-LABEL: 'vf32_2' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:64 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x float> undef, <float 2., float 2.> @@ -1602,11 +1602,11 @@ define void @vf32_2() { define void @vf64_2() { ; CHECK-NEON-LABEL: 'vf64_2' ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) -; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVE-LABEL: 'vf64_2' @@ -1638,11 +1638,11 @@ define void @vf64_2() { ; ; CHECK-V8R-LABEL: 'vf64_2' ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) -; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %1 = fdiv <2 x double> undef, <double 2., double 2.> diff --git a/llvm/test/Analysis/CostModel/ARM/fparith.ll b/llvm/test/Analysis/CostModel/ARM/fparith.ll index f9424e8..6f2626c 100644 --- a/llvm/test/Analysis/CostModel/ARM/fparith.ll +++ b/llvm/test/Analysis/CostModel/ARM/fparith.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @f32() { ; CHECK-MVE-LABEL: 'f32' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c = fadd float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %d = fsub float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %e = fmul float undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'f32' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c = fadd float undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %d = fsub float undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %e = fmul float undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = fadd float undef, undef %d = fsub float undef, undef @@ -25,16 +25,16 @@ define void @f32() { define void @f16() { ; CHECK-MVE-LABEL: 'f16' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c = fadd half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %d = fsub half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %e = fmul half undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'f16' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c = fadd half undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %d = fsub half undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %e = fmul half undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = fadd half undef, undef %d = fsub half undef, undef @@ -44,16 +44,16 @@ define void @f16() { define void @f64() { ; CHECK-MVE-LABEL: 'f64' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c = fadd double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %d = fsub double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %e = fmul double undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'f64' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c = fadd double undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %d = fsub double undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %e = fmul double undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c = fadd double undef, undef %d = fsub double undef, undef @@ -63,28 +63,28 @@ define void @f64() { define void @vf32() { ; CHECK-MVE-LABEL: 'vf32' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %c2 = fadd <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %d2 = fsub <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %e2 = fmul <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %c4 = fadd <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %d4 = fsub <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %e4 = fmul <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %c8 = fadd <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %d8 = fsub <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %e8 = fmul <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'vf32' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = fsub <2 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = fmul <2 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = fadd <4 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = fsub <4 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = fmul <4 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = fadd <8 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = fsub <8 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = fmul <8 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c2 = fadd <2 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d2 = fsub <2 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e2 = fmul <2 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c4 = fadd <4 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d4 = fsub <4 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e4 = fmul <4 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %c8 = fadd <8 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %d8 = fsub <8 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %e8 = fmul <8 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c2 = fadd <2 x float> undef, undef %d2 = fsub <2 x float> undef, undef @@ -100,28 +100,28 @@ define void @vf32() { define void @vf16() { ; CHECK-MVE-LABEL: 'vf16' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %c2 = fadd <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %d2 = fsub <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %e2 = fmul <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %c4 = fadd <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %d4 = fsub <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %e4 = fmul <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %c8 = fadd <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %d8 = fsub <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %e8 = fmul <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'vf16' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d2 = fsub <2 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e2 = fmul <2 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c4 = fadd <4 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d4 = fsub <4 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e4 = fmul <4 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = fadd <8 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = fsub <8 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = fmul <8 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c2 = fadd <2 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d2 = fsub <2 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e2 = fmul <2 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c4 = fadd <4 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d4 = fsub <4 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e4 = fmul <4 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c8 = fadd <8 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %d8 = fsub <8 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e8 = fmul <8 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c2 = fadd <2 x half> undef, undef %d2 = fsub <2 x half> undef, undef @@ -137,28 +137,28 @@ define void @vf16() { define void @vf64() { ; CHECK-MVE-LABEL: 'vf64' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %c2 = fadd <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %d2 = fsub <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 4 for: %e2 = fmul <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %c4 = fadd <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %d4 = fsub <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 8 for: %e4 = fmul <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %c8 = fadd <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %d8 = fsub <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of 16 for: %e8 = fmul <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'vf64' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 4 for: %c2 = fadd <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 4 for: %d2 = fsub <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 4 for: %e2 = fmul <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 8 for: %c4 = fadd <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 8 for: %d4 = fsub <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 8 for: %e4 = fmul <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 16 for: %c8 = fadd <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 16 for: %d8 = fsub <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 16 for: %e8 = fmul <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c2 = fadd <2 x double> undef, undef %d2 = fsub <2 x double> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll index aff7b19..af548f6 100644 --- a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll @@ -1,213 +1,213 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 < %s | FileCheck %s --check-prefix=CHECK-MVEFP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp,+fp64 < %s | FileCheck %s --check-prefix=CHECK-MVEFP define void @casts() { ; CHECK-MVE-LABEL: 'casts' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 406 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 314 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 626 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1360 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1304 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 922 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 794 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 844 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 794 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 844 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 794 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 844 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1352 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1296 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1834 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1578 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1676 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1578 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1676 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1576 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1672 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1568 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4912 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4800 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 3018 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2762 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2860 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2762 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2860 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2760 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2856 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2752 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4880 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4768 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:7 Lat:23 SizeLat:23 for: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:24 CodeSize:7 Lat:24 SizeLat:24 for: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:7 Lat:23 SizeLat:23 for: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:24 CodeSize:7 Lat:24 SizeLat:24 for: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:120 CodeSize:43 Lat:85 SizeLat:85 for: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:134 CodeSize:30 Lat:67 SizeLat:67 for: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:120 CodeSize:17 Lat:53 SizeLat:53 for: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:120 CodeSize:43 Lat:85 SizeLat:85 for: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:134 CodeSize:30 Lat:67 SizeLat:67 for: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:120 CodeSize:17 Lat:53 SizeLat:53 for: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:240 CodeSize:85 Lat:169 SizeLat:169 for: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:202 CodeSize:58 Lat:131 SizeLat:131 for: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:202 CodeSize:58 Lat:131 SizeLat:131 for: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:202 CodeSize:58 Lat:131 SizeLat:131 for: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:406 CodeSize:59 Lat:133 SizeLat:133 for: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:378 CodeSize:33 Lat:105 SizeLat:105 for: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:314 CodeSize:85 Lat:169 SizeLat:169 for: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:250 CodeSize:33 Lat:105 SizeLat:105 for: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:276 CodeSize:58 Lat:131 SizeLat:131 for: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:250 CodeSize:33 Lat:105 SizeLat:105 for: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:276 CodeSize:58 Lat:131 SizeLat:131 for: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:250 CodeSize:33 Lat:105 SizeLat:105 for: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:276 CodeSize:58 Lat:131 SizeLat:131 for: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:250 CodeSize:33 Lat:105 SizeLat:105 for: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:404 CodeSize:59 Lat:133 SizeLat:133 for: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:376 CodeSize:33 Lat:105 SizeLat:105 for: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:626 CodeSize:169 Lat:337 SizeLat:337 for: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:498 CodeSize:65 Lat:209 SizeLat:209 for: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:548 CodeSize:114 Lat:259 SizeLat:259 for: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:498 CodeSize:65 Lat:209 SizeLat:209 for: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:548 CodeSize:114 Lat:259 SizeLat:259 for: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:498 CodeSize:65 Lat:209 SizeLat:209 for: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:548 CodeSize:115 Lat:261 SizeLat:261 for: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:496 CodeSize:65 Lat:209 SizeLat:209 for: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1360 CodeSize:117 Lat:265 SizeLat:265 for: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1304 CodeSize:65 Lat:209 SizeLat:209 for: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:922 CodeSize:169 Lat:337 SizeLat:337 for: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:794 CodeSize:65 Lat:209 SizeLat:209 for: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:844 CodeSize:114 Lat:259 SizeLat:259 for: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:794 CodeSize:65 Lat:209 SizeLat:209 for: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:844 CodeSize:114 Lat:259 SizeLat:259 for: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:794 CodeSize:65 Lat:209 SizeLat:209 for: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:844 CodeSize:115 Lat:261 SizeLat:261 for: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:792 CodeSize:65 Lat:209 SizeLat:209 for: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1352 CodeSize:117 Lat:265 SizeLat:265 for: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1296 CodeSize:65 Lat:209 SizeLat:209 for: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1834 CodeSize:337 Lat:673 SizeLat:673 for: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1578 CodeSize:129 Lat:417 SizeLat:417 for: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1676 CodeSize:226 Lat:515 SizeLat:515 for: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1578 CodeSize:129 Lat:417 SizeLat:417 for: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1676 CodeSize:227 Lat:517 SizeLat:517 for: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1576 CodeSize:129 Lat:417 SizeLat:417 for: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1672 CodeSize:229 Lat:521 SizeLat:521 for: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1568 CodeSize:129 Lat:417 SizeLat:417 for: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4912 CodeSize:233 Lat:529 SizeLat:529 for: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4800 CodeSize:129 Lat:417 SizeLat:417 for: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:3018 CodeSize:337 Lat:673 SizeLat:673 for: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2762 CodeSize:129 Lat:417 SizeLat:417 for: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2860 CodeSize:226 Lat:515 SizeLat:515 for: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2762 CodeSize:129 Lat:417 SizeLat:417 for: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2860 CodeSize:227 Lat:517 SizeLat:517 for: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2760 CodeSize:129 Lat:417 SizeLat:417 for: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2856 CodeSize:229 Lat:521 SizeLat:521 for: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2752 CodeSize:129 Lat:417 SizeLat:417 for: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4880 CodeSize:233 Lat:529 SizeLat:529 for: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4768 CodeSize:129 Lat:417 SizeLat:417 for: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'casts' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1104 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 762 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1192 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4488 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4400 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2698 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2474 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2540 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2474 for instruction: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2540 for instruction: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2472 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2536 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2464 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4560 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4480 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %f32u32 = call i32 @llvm.fptoui.sat.i32.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %f32s64 = call i64 @llvm.fptosi.sat.i64.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 3 for: %f32u64 = call i64 @llvm.fptoui.sat.i64.f32(float undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64s1 = call i1 @llvm.fptosi.sat.i1.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64u1 = call i1 @llvm.fptoui.sat.i1.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64s8 = call i8 @llvm.fptosi.sat.i8.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64u8 = call i8 @llvm.fptoui.sat.i8.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64s16 = call i16 @llvm.fptosi.sat.i16.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64u16 = call i16 @llvm.fptoui.sat.i16.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %f64s32 = call i32 @llvm.fptosi.sat.i32.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %f64u32 = call i32 @llvm.fptoui.sat.i32.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64s64 = call i64 @llvm.fptosi.sat.i64.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:7 SizeLat:7 for: %f64u64 = call i64 @llvm.fptoui.sat.i64.f64(double undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f32s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f32u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f32s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f32u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f32s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:76 CodeSize:5 Lat:9 SizeLat:9 for: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:72 CodeSize:3 Lat:5 SizeLat:5 for: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:80 CodeSize:35 Lat:45 SizeLat:45 for: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:52 CodeSize:17 Lat:17 SizeLat:17 for: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:62 CodeSize:22 Lat:27 SizeLat:27 for: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:52 CodeSize:17 Lat:17 SizeLat:17 for: %v2f64u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:62 CodeSize:22 Lat:27 SizeLat:27 for: %v2f64s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:52 CodeSize:17 Lat:17 SizeLat:17 for: %v2f64u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:62 CodeSize:22 Lat:27 SizeLat:27 for: %v2f64s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:52 CodeSize:17 Lat:17 SizeLat:17 for: %v2f64u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:94 CodeSize:22 Lat:27 SizeLat:27 for: %v2f64s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:84 CodeSize:17 Lat:17 SizeLat:17 for: %v2f64u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f32s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f32u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f32s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f32u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f32s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:284 CodeSize:6 Lat:11 SizeLat:11 for: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:278 CodeSize:3 Lat:5 SizeLat:5 for: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:234 CodeSize:69 Lat:89 SizeLat:89 for: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:178 CodeSize:33 Lat:33 SizeLat:33 for: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:196 CodeSize:42 Lat:51 SizeLat:51 for: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:178 CodeSize:33 Lat:33 SizeLat:33 for: %v4f64u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:196 CodeSize:42 Lat:51 SizeLat:51 for: %v4f64s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:178 CodeSize:33 Lat:33 SizeLat:33 for: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:196 CodeSize:42 Lat:51 SizeLat:51 for: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:178 CodeSize:33 Lat:33 SizeLat:33 for: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:324 CodeSize:43 Lat:53 SizeLat:53 for: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:304 CodeSize:33 Lat:33 SizeLat:33 for: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v8f32u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v8f32s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:1148 CodeSize:43 Lat:53 SizeLat:53 for: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:1104 CodeSize:5 Lat:9 SizeLat:9 for: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:762 CodeSize:137 Lat:177 SizeLat:177 for: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:650 CodeSize:65 Lat:65 SizeLat:65 for: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:684 CodeSize:82 Lat:99 SizeLat:99 for: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:650 CodeSize:65 Lat:65 SizeLat:65 for: %v8f64u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:684 CodeSize:82 Lat:99 SizeLat:99 for: %v8f64s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:650 CodeSize:65 Lat:65 SizeLat:65 for: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:684 CodeSize:83 Lat:101 SizeLat:101 for: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:648 CodeSize:65 Lat:65 SizeLat:65 for: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:1192 CodeSize:85 Lat:105 SizeLat:105 for: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:1152 CodeSize:65 Lat:65 SizeLat:65 for: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:24 SizeLat:24 for: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:24 SizeLat:24 for: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:24 SizeLat:24 for: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:24 SizeLat:24 for: %v16f32u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:24 SizeLat:24 for: %v16f32s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:24 CodeSize:28 Lat:24 SizeLat:24 for: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4488 CodeSize:85 Lat:105 SizeLat:105 for: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4400 CodeSize:9 Lat:17 SizeLat:17 for: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2698 CodeSize:273 Lat:353 SizeLat:353 for: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2474 CodeSize:129 Lat:129 SizeLat:129 for: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2540 CodeSize:162 Lat:195 SizeLat:195 for: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2474 CodeSize:129 Lat:129 SizeLat:129 for: %v16f64u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2540 CodeSize:163 Lat:197 SizeLat:197 for: %v16f64s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2472 CodeSize:129 Lat:129 SizeLat:129 for: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2536 CodeSize:165 Lat:201 SizeLat:201 for: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2464 CodeSize:129 Lat:129 SizeLat:129 for: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4560 CodeSize:169 Lat:209 SizeLat:209 for: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4480 CodeSize:129 Lat:129 SizeLat:129 for: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) @@ -324,110 +324,110 @@ define void @casts() { define void @fp16() { ; CHECK-MVE-LABEL: 'fp16' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 406 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 550 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1362 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1306 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1250 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 994 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1092 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 994 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1092 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 992 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1680 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1576 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4920 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4808 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:7 Lat:23 SizeLat:23 for: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:23 CodeSize:6 Lat:23 SizeLat:23 for: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:24 CodeSize:7 Lat:24 SizeLat:24 for: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:21 CodeSize:3 Lat:21 SizeLat:21 for: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:120 CodeSize:43 Lat:85 SizeLat:85 for: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:102 CodeSize:30 Lat:67 SizeLat:67 for: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:88 CodeSize:17 Lat:53 SizeLat:53 for: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:134 CodeSize:30 Lat:67 SizeLat:67 for: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:120 CodeSize:17 Lat:53 SizeLat:53 for: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:240 CodeSize:85 Lat:169 SizeLat:169 for: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:202 CodeSize:58 Lat:131 SizeLat:131 for: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:202 CodeSize:58 Lat:131 SizeLat:131 for: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:202 CodeSize:58 Lat:131 SizeLat:131 for: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:176 CodeSize:33 Lat:105 SizeLat:105 for: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:406 CodeSize:59 Lat:133 SizeLat:133 for: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:378 CodeSize:33 Lat:105 SizeLat:105 for: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:480 CodeSize:169 Lat:337 SizeLat:337 for: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:352 CodeSize:65 Lat:209 SizeLat:209 for: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:402 CodeSize:114 Lat:259 SizeLat:259 for: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:352 CodeSize:65 Lat:209 SizeLat:209 for: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:402 CodeSize:114 Lat:259 SizeLat:259 for: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:352 CodeSize:65 Lat:209 SizeLat:209 for: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:550 CodeSize:115 Lat:261 SizeLat:261 for: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:498 CodeSize:65 Lat:209 SizeLat:209 for: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1362 CodeSize:117 Lat:265 SizeLat:265 for: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1306 CodeSize:65 Lat:209 SizeLat:209 for: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1250 CodeSize:337 Lat:673 SizeLat:673 for: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:994 CodeSize:129 Lat:417 SizeLat:417 for: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1092 CodeSize:226 Lat:515 SizeLat:515 for: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:994 CodeSize:129 Lat:417 SizeLat:417 for: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1092 CodeSize:227 Lat:517 SizeLat:517 for: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:992 CodeSize:129 Lat:417 SizeLat:417 for: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1680 CodeSize:229 Lat:521 SizeLat:521 for: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1576 CodeSize:129 Lat:417 SizeLat:417 for: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4920 CodeSize:233 Lat:529 SizeLat:529 for: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4808 CodeSize:129 Lat:417 SizeLat:417 for: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'fp16' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1102 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4484 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4400 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:5 CodeSize:7 Lat:5 SizeLat:5 for: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 3 for: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:44 CodeSize:5 Lat:9 SizeLat:9 for: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:40 CodeSize:3 Lat:5 SizeLat:5 for: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:76 CodeSize:5 Lat:9 SizeLat:9 for: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:72 CodeSize:3 Lat:5 SizeLat:5 for: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:9 SizeLat:9 for: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:5 SizeLat:5 for: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:284 CodeSize:6 Lat:11 SizeLat:11 for: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:278 CodeSize:3 Lat:5 SizeLat:5 for: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:6 CodeSize:7 Lat:6 SizeLat:6 for: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:20 CodeSize:6 Lat:11 SizeLat:11 for: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:14 CodeSize:3 Lat:5 SizeLat:5 for: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:1112 CodeSize:8 Lat:15 SizeLat:15 for: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:1102 CodeSize:3 Lat:5 SizeLat:5 for: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:12 CodeSize:14 Lat:12 SizeLat:12 for: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:124 CodeSize:75 Lat:85 SizeLat:85 for: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:48 CodeSize:5 Lat:9 SizeLat:9 for: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4484 CodeSize:79 Lat:93 SizeLat:93 for: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:4400 CodeSize:5 Lat:9 SizeLat:9 for: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) diff --git a/llvm/test/Analysis/CostModel/ARM/freeshift.ll b/llvm/test/Analysis/CostModel/ARM/freeshift.ll index 51e87b5..cd5c8c5 100644 --- a/llvm/test/Analysis/CostModel/ARM/freeshift.ll +++ b/llvm/test/Analysis/CostModel/ARM/freeshift.ll @@ -1,23 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @shl(i32 %a, i32 %b) { ; CHECK-LABEL: 'shl' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = shl i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = shl i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = shl i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = shl i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = shl i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = shl i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 0 for: %as = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ac = add i32 %b, %as +; CHECK-NEXT: Cost Model: Found costs of 0 for: %ss = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %sc = sub i32 %b, %ss +; CHECK-NEXT: Cost Model: Found costs of 0 for: %xs = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %xc = xor i32 %b, %xs +; CHECK-NEXT: Cost Model: Found costs of 0 for: %ns = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %nc = and i32 %b, %ns +; CHECK-NEXT: Cost Model: Found costs of 0 for: %os = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %oc = or i32 %b, %os +; CHECK-NEXT: Cost Model: Found costs of 0 for: %is = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ic = icmp eq i32 %b, %is +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret void ; %as = shl i32 %a, 3 %ac = add i32 %b, %as @@ -36,19 +36,19 @@ define void @shl(i32 %a, i32 %b) { define void @ashr(i32 %a, i32 %b) { ; CHECK-LABEL: 'ashr' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = ashr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = ashr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = ashr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = ashr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = ashr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = ashr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 0 for: %as = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ac = add i32 %b, %as +; CHECK-NEXT: Cost Model: Found costs of 0 for: %ss = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %sc = sub i32 %b, %ss +; CHECK-NEXT: Cost Model: Found costs of 0 for: %xs = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %xc = xor i32 %b, %xs +; CHECK-NEXT: Cost Model: Found costs of 0 for: %ns = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %nc = and i32 %b, %ns +; CHECK-NEXT: Cost Model: Found costs of 0 for: %os = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %oc = or i32 %b, %os +; CHECK-NEXT: Cost Model: Found costs of 0 for: %is = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ic = icmp eq i32 %b, %is +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret void ; %as = ashr i32 %a, 3 %ac = add i32 %b, %as @@ -67,19 +67,19 @@ define void @ashr(i32 %a, i32 %b) { define void @lshr(i32 %a, i32 %b) { ; CHECK-LABEL: 'lshr' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = lshr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = lshr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = lshr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = lshr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = lshr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = lshr i32 %a, 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found costs of 0 for: %as = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ac = add i32 %b, %as +; CHECK-NEXT: Cost Model: Found costs of 0 for: %ss = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %sc = sub i32 %b, %ss +; CHECK-NEXT: Cost Model: Found costs of 0 for: %xs = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %xc = xor i32 %b, %xs +; CHECK-NEXT: Cost Model: Found costs of 0 for: %ns = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %nc = and i32 %b, %ns +; CHECK-NEXT: Cost Model: Found costs of 0 for: %os = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %oc = or i32 %b, %os +; CHECK-NEXT: Cost Model: Found costs of 0 for: %is = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ic = icmp eq i32 %b, %is +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret void ; %as = lshr i32 %a, 3 %ac = add i32 %b, %as diff --git a/llvm/test/Analysis/CostModel/ARM/gep.ll b/llvm/test/Analysis/CostModel/ARM/gep.ll index 48de193..cce87a5 100644 --- a/llvm/test/Analysis/CostModel/ARM/gep.ll +++ b/llvm/test/Analysis/CostModel/ARM/gep.ll @@ -1,98 +1,98 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V6M -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-V7M-NOFP -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-V7M-FP -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-T32 -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-A32 +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V6M +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-V7M-NOFP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-V7M-FP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVEFP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-T32 +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-A32 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @testi8(ptr %a, i32 %i) { ; CHECK-V6M-LABEL: 'testi8' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testi8' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testi8' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testi8' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testi8' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testi8' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testi8' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i8, ptr %a, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i8, ptr %a, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i8, ptr %a, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i8, ptr %a, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i8, ptr %a, i32 -255 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i8, ptr %a, i32 -256 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i8, ptr %a, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a1 = getelementptr inbounds i8, ptr %a, i32 1 %am4 = getelementptr inbounds i8, ptr %a, i32 -1 @@ -109,88 +109,88 @@ define void @testi8(ptr %a, i32 %i) { define void @testi16(ptr %a, i32 %i) { ; CHECK-V6M-LABEL: 'testi16' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testi16' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testi16' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testi16' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testi16' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testi16' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testi16' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i16, ptr %a, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i16, ptr %a, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i16, ptr %a, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i16, ptr %a, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i16, ptr %a, i32 -127 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i16, ptr %a, i32 -128 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i16, ptr %a, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a1 = getelementptr inbounds i16, ptr %a, i32 1 %am4 = getelementptr inbounds i16, ptr %a, i32 -1 @@ -207,88 +207,88 @@ define void @testi16(ptr %a, i32 %i) { define void @testi32(ptr %a, i32 %i) { ; CHECK-V6M-LABEL: 'testi32' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testi32' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testi32' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testi32' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testi32' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testi32' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testi32' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i32, ptr %a, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i32, ptr %a, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i32, ptr %a, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i32, ptr %a, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i32, ptr %a, i32 -63 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i32, ptr %a, i32 -64 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %ai = getelementptr inbounds i32, ptr %a, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a1 = getelementptr inbounds i32, ptr %a, i32 1 %am4 = getelementptr inbounds i32, ptr %a, i32 -1 @@ -305,102 +305,102 @@ define void @testi32(ptr %a, i32 %i) { define void @testi64(ptr %a, i32 %i) { ; CHECK-V6M-LABEL: 'testi64' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testi64' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testi64' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testi64' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testi64' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testi64' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testi64' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds i64, ptr %a, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am4 = getelementptr inbounds i64, ptr %a, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a15 = getelementptr inbounds i64, ptr %a, i32 15 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a16 = getelementptr inbounds i64, ptr %a, i32 16 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a31 = getelementptr inbounds i64, ptr %a, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a32 = getelementptr inbounds i64, ptr %a, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds i64, ptr %a, i32 -63 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds i64, ptr %a, i32 -64 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds i64, ptr %a, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a1 = getelementptr inbounds i64, ptr %a, i32 1 %am4 = getelementptr inbounds i64, ptr %a, i32 -1 @@ -419,102 +419,102 @@ define void @testi64(ptr %a, i32 %i) { define void @testhalf(ptr %a, i32 %i) { ; CHECK-V6M-LABEL: 'testhalf' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testhalf' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds half, ptr %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds half, ptr %a, i32 255 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds half, ptr %a, i32 256 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testhalf' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds half, ptr %a, i32 256 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testhalf' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds half, ptr %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds half, ptr %a, i32 255 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testhalf' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds half, ptr %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds half, ptr %a, i32 255 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testhalf' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds half, ptr %a, i32 256 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testhalf' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1 = getelementptr inbounds half, ptr %a, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds half, ptr %a, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds half, ptr %a, i32 255 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds half, ptr %a, i32 256 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds half, ptr %a, i32 -255 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds half, ptr %a, i32 -256 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds half, ptr %a, i32 1023 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds half, ptr %a, i32 1024 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds half, ptr %a, i32 -63 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds half, ptr %a, i32 -64 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds half, ptr %a, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a1 = getelementptr inbounds half, ptr %a, i32 1 %am1 = getelementptr inbounds half, ptr %a, i32 -1 @@ -533,102 +533,102 @@ define void @testhalf(ptr %a, i32 %i) { define void @testfloat(ptr %a, i32 %i) { ; CHECK-V6M-LABEL: 'testfloat' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds float, ptr %a, i32 255 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testfloat' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds float, ptr %a, i32 256 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testfloat' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testfloat' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a256 = getelementptr inbounds float, ptr %a, i32 256 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testfloat' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testfloat' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testfloat' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds float, ptr %a, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds float, ptr %a, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds float, ptr %a, i32 255 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds float, ptr %a, i32 256 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds float, ptr %a, i32 -255 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds float, ptr %a, i32 -256 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds float, ptr %a, i32 1023 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds float, ptr %a, i32 1024 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds float, ptr %a, i32 -63 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds float, ptr %a, i32 -64 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds float, ptr %a, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a1 = getelementptr inbounds float, ptr %a, i32 1 %am1 = getelementptr inbounds float, ptr %a, i32 -1 @@ -647,102 +647,102 @@ define void @testfloat(ptr %a, i32 %i) { define void @testdouble(ptr %a, i32 %i) { ; CHECK-V6M-LABEL: 'testdouble' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a255 = getelementptr inbounds double, ptr %a, i32 127 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testdouble' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testdouble' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testdouble' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testdouble' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testdouble' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testdouble' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a1 = getelementptr inbounds double, ptr %a, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am1 = getelementptr inbounds double, ptr %a, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %a255 = getelementptr inbounds double, ptr %a, i32 127 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a256 = getelementptr inbounds double, ptr %a, i32 128 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am255 = getelementptr inbounds double, ptr %a, i32 -127 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %am256 = getelementptr inbounds double, ptr %a, i32 -128 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1023 = getelementptr inbounds double, ptr %a, i32 511 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %a1024 = getelementptr inbounds double, ptr %a, i32 512 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am63 = getelementptr inbounds double, ptr %a, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %am64 = getelementptr inbounds double, ptr %a, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %ai = getelementptr inbounds double, ptr %a, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a1 = getelementptr inbounds double, ptr %a, i32 1 %am1 = getelementptr inbounds double, ptr %a, i32 -1 @@ -761,375 +761,375 @@ define void @testdouble(ptr %a, i32 %i) { define void @testvecs(i32 %i) { ; CHECK-V6M-LABEL: 'testvecs' -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 -; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 +; CHECK-V6M-NEXT: Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 +; CHECK-V6M-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-NOFP-LABEL: 'testvecs' -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 -; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-V7M-FP-LABEL: 'testvecs' -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 -; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'testvecs' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-MVEFP-LABEL: 'testvecs' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-T32-LABEL: 'testvecs' -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i +; CHECK-T32-NEXT: Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 +; CHECK-T32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-A32-LABEL: 'testvecs' -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32 +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of 1 for: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i +; CHECK-A32-NEXT: Cost Model: Found costs of 0 for: %d0 = getelementptr inbounds i8, ptr undef, i32 -1 +; CHECK-A32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1 diff --git a/llvm/test/Analysis/CostModel/ARM/immediates.ll b/llvm/test/Analysis/CostModel/ARM/immediates.ll index ed13636..cd42313 100644 --- a/llvm/test/Analysis/CostModel/ARM/immediates.ll +++ b/llvm/test/Analysis/CostModel/ARM/immediates.ll @@ -1,145 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-LATENCY -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2-LATENCY -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2-THROUGHPUT +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1 +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @const_costs() { -; CHECK-T1-SIZE-LABEL: 'const_costs' -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 +; CHECK-T1-LABEL: 'const_costs' +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %add_1 = add i32 undef, 1 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %add_32767 = add i32 undef, 32767 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %sub_1 = sub i32 undef, 1 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %sub_32768 = sub i32 undef, 32768 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %mul_2 = mul i32 undef, 2 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %mul_3 = mul i32 undef, 3 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %mul_27 = mul i32 undef, 27 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %and_255 = and i32 undef, 255 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %and_65535 = and i32 undef, 65535 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %and_1 = and i32 undef, 1 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %xor_1 = xor i32 undef, 1 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %xor_7 = xor i32 undef, 7 +; CHECK-T1-NEXT: Cost Model: Found costs of 0 for: %gep_1 = getelementptr i32, ptr undef, i32 1 +; CHECK-T1-NEXT: Cost Model: Found costs of 0 for: %gep_16 = getelementptr i32, ptr undef, i32 16 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: ret i32 1 ; -; CHECK-T2-SIZE-LABEL: 'const_costs' -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 -; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 -; -; CHECK-T1-LATENCY-LABEL: 'const_costs' -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 -; -; CHECK-T2-LATENCY-LABEL: 'const_costs' -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 -; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 -; -; CHECK-T1-THROUGHPUT-LABEL: 'const_costs' -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 -; -; CHECK-T2-THROUGHPUT-LABEL: 'const_costs' -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 -; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 +; CHECK-T2-LABEL: 'const_costs' +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %add_1 = add i32 undef, 1 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %add_32767 = add i32 undef, 32767 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %sub_1 = sub i32 undef, 1 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %sub_32768 = sub i32 undef, 32768 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %mul_2 = mul i32 undef, 2 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %mul_3 = mul i32 undef, 3 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %mul_27 = mul i32 undef, 27 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %and_255 = and i32 undef, 255 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %and_65535 = and i32 undef, 65535 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %and_1 = and i32 undef, 1 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %xor_1 = xor i32 undef, 1 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %xor_7 = xor i32 undef, 7 +; CHECK-T2-NEXT: Cost Model: Found costs of 0 for: %gep_1 = getelementptr i32, ptr undef, i32 1 +; CHECK-T2-NEXT: Cost Model: Found costs of 0 for: %gep_16 = getelementptr i32, ptr undef, i32 16 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: ret i32 1 ; %add_1 = add i32 undef, 1 %add_32767 = add i32 undef, 32767 diff --git a/llvm/test/Analysis/CostModel/ARM/insertelement.ll b/llvm/test/Analysis/CostModel/ARM/insertelement.ll index 5a922dd..f14b200 100644 --- a/llvm/test/Analysis/CostModel/ARM/insertelement.ll +++ b/llvm/test/Analysis/CostModel/ARM/insertelement.ll @@ -1,4 +1,5 @@ -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios6.0.0" @@ -7,12 +8,16 @@ target triple = "thumbv7-apple-ios6.0.0" ; due to renaming constraints. %T_i8v = type <8 x i8> %T_i8 = type i8 -; CHECK: insertelement_i8 -define void @insertelement_i8(ptr %saddr, - ptr %vaddr) { +define void @insertelement_i8(ptr %saddr, ptr %vaddr) { +; CHECK-LABEL: 'insertelement_i8' +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <8 x i8>, ptr %vaddr, align 4 +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i8, ptr %saddr, align 1 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <8 x i8> %v0, i8 %v1, i32 1 +; CHECK-NEXT: Cost Model: Found costs of 1 for: store <8 x i8> %v2, ptr %vaddr, align 4 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T_i8v, ptr %vaddr %v1 = load %T_i8, ptr %saddr -;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8> %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1 store %T_i8v %v2, ptr %vaddr ret void @@ -21,12 +26,16 @@ define void @insertelement_i8(ptr %saddr, %T_i16v = type <4 x i16> %T_i16 = type i16 -; CHECK: insertelement_i16 -define void @insertelement_i16(ptr %saddr, - ptr %vaddr) { +define void @insertelement_i16(ptr %saddr, ptr %vaddr) { +; CHECK-LABEL: 'insertelement_i16' +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %vaddr, align 4 +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i16, ptr %saddr, align 2 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <4 x i16> %v0, i16 %v1, i32 1 +; CHECK-NEXT: Cost Model: Found costs of 1 for: store <4 x i16> %v2, ptr %vaddr, align 4 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T_i16v, ptr %vaddr %v1 = load %T_i16, ptr %saddr -;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16> %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1 store %T_i16v %v2, ptr %vaddr ret void @@ -34,12 +43,16 @@ define void @insertelement_i16(ptr %saddr, %T_i32v = type <2 x i32> %T_i32 = type i32 -; CHECK: insertelement_i32 -define void @insertelement_i32(ptr %saddr, - ptr %vaddr) { +define void @insertelement_i32(ptr %saddr, ptr %vaddr) { +; CHECK-LABEL: 'insertelement_i32' +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %vaddr, align 4 +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load i32, ptr %saddr, align 4 +; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2 = insertelement <2 x i32> %v0, i32 %v1, i32 1 +; CHECK-NEXT: Cost Model: Found costs of 1 for: store <2 x i32> %v2, ptr %vaddr, align 4 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; %v0 = load %T_i32v, ptr %vaddr %v1 = load %T_i32, ptr %saddr -;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32> %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1 store %T_i32v %v2, ptr %vaddr ret void diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll deleted file mode 100644 index 6377437..0000000 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ /dev/null @@ -1,371 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput < %s | FileCheck %s --check-prefix=THRU -; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency < %s | FileCheck %s --check-prefix=LATE -; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE -; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE - -target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - -; Test a cross-section of intrinsics for various cost-kinds. -; Other test files may check for accuracy of a particular intrinsic -; across subtargets or types. This is just a basic correctness check using an -; ARM target and a legal scalar type (i32/float) and/or an -; illegal vector type (16 x i32/float). - -declare i32 @llvm.smax.i32(i32, i32) -declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) - -declare float @llvm.fmuladd.f32(float, float, float) -declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) - -declare float @llvm.log2.f32(float) -declare <16 x float> @llvm.log2.v16f32(<16 x float>) - -declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) -declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) - -declare float @llvm.maximum.f32(float, float) -declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) - -declare i32 @llvm.cttz.i32(i32, i1) -declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) - -declare i32 @llvm.ctlz.i32(i32, i1) -declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) - -declare i32 @llvm.fshl.i32(i32, i32, i32) -declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) - -declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) -declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) -declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) - -declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) - -declare i32 @llvm.ssa.copy.i32(i32) -declare float @llvm.ssa.copy.f32(float) -declare ptr @llvm.ssa.copy.p0(ptr) - -define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) { -; THRU-LABEL: 'smax' -; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'smax' -; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'smax' -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'smax' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) - %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) - ret void -} - -define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) { -; THRU-LABEL: 'fmuladd' -; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'fmuladd' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'fmuladd' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'fmuladd' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) - %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) - ret void -} - -define void @log2(float %a, <16 x float> %va) { -; THRU-LABEL: 'log2' -; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) -; THRU-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'log2' -; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) -; LATE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'log2' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a) -; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'log2' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call float @llvm.log2.f32(float %a) - %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) - ret void -} - -define void @constrained_fadd(float %a, <16 x float> %va) strictfp { -; THRU-LABEL: 'constrained_fadd' -; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; THRU-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'constrained_fadd' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; LATE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'constrained_fadd' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'constrained_fadd' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") - %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") - ret void -} - -define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { -; THRU-LABEL: 'fmaximum' -; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; THRU-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'fmaximum' -; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'fmaximum' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'fmaximum' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call float @llvm.maximum.f32(float %a, float %b) - %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) - ret void -} - -define void @cttz(i32 %a, <16 x i32> %va) { -; THRU-LABEL: 'cttz' -; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'cttz' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'cttz' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'cttz' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) - %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) - ret void -} - -define void @ctlz(i32 %a, <16 x i32> %va) { -; THRU-LABEL: 'ctlz' -; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'ctlz' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'ctlz' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'ctlz' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) - %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) - ret void -} - -define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) { -; THRU-LABEL: 'fshl' -; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; THRU-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'fshl' -; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'fshl' -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'fshl' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) - %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) - ret void -} - -define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) { -; THRU-LABEL: 'maskedgather' -; THRU-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> align 1 %va, <16 x i1> %vb, <16 x float> %vc) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'maskedgather' -; LATE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> align 1 %va, <16 x i1> %vb, <16 x float> %vc) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'maskedgather' -; SIZE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> align 1 %va, <16 x i1> %vb, <16 x float> %vc) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'maskedgather' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> align 1 %va, <16 x i1> %vb, <16 x float> %vc) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) - ret void -} - -define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) { -; THRU-LABEL: 'maskedscatter' -; THRU-NEXT: Cost Model: Found an estimated cost of 176 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> align 1 %vb, <16 x i1> %vc) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'maskedscatter' -; LATE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> align 1 %vb, <16 x i1> %vc) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'maskedscatter' -; SIZE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> align 1 %vb, <16 x i1> %vc) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'maskedscatter' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> align 1 %vb, <16 x i1> %vc) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) - ret void -} - -define void @reduce_fmax(<16 x float> %va) { -; THRU-LABEL: 'reduce_fmax' -; THRU-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'reduce_fmax' -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'reduce_fmax' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) - ret void -} - -define void @memcpy(ptr %a, ptr %b, i32 %c) { -; THRU-LABEL: 'memcpy' -; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'memcpy' -; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'memcpy' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'memcpy' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) - ret void -} - -define void @ssa_copy() { - ; CHECK: %{{.*}} = llvm.intr.ssa.copy %{{.*}} : f32 -; THRU-LABEL: 'ssa_copy' -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i = call i32 @llvm.ssa.copy.i32(i32 undef) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = call float @llvm.ssa.copy.f32(float undef) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p = call ptr @llvm.ssa.copy.p0(ptr undef) -; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; LATE-LABEL: 'ssa_copy' -; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i = call i32 @llvm.ssa.copy.i32(i32 undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = call float @llvm.ssa.copy.f32(float undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p = call ptr @llvm.ssa.copy.p0(ptr undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE-LABEL: 'ssa_copy' -; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i = call i32 @llvm.ssa.copy.i32(i32 undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = call float @llvm.ssa.copy.f32(float undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p = call ptr @llvm.ssa.copy.p0(ptr undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; SIZE_LATE-LABEL: 'ssa_copy' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i = call i32 @llvm.ssa.copy.i32(i32 undef) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = call float @llvm.ssa.copy.f32(float undef) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p = call ptr @llvm.ssa.copy.p0(ptr undef) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %i = call i32 @llvm.ssa.copy.i32(i32 undef) - %f = call float @llvm.ssa.copy.f32(float undef) - %p = call ptr @llvm.ssa.copy.p0(ptr undef) - ret void -} diff --git a/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll b/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll index 4404209..c98601f 100644 --- a/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll +++ b/llvm/test/Analysis/CostModel/ARM/load-to-trunc.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi < %s | FileCheck %s + ; Check memory cost model action for a load of an unusually sized integer ; follow by and a trunc to a register sized integer gives a cost of 1 rather ; than the expanded cost if it is not. Currently, this target does not have ; that expansion. -; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK - ; Check that cost is 1 for unusual load to register sized load. define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) { ; CHECK-LABEL: 'loadUnusualIntegerWithTrunc' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %out = load i128, ptr %ptr, align 8 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %trunc = trunc i128 %out to i32 +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i32 %trunc ; %out = load i128, ptr %ptr %trunc = trunc i128 %out to i32 @@ -20,8 +20,8 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) { define i128 @loadUnusualInteger(ptr %ptr) { ; CHECK-LABEL: 'loadUnusualInteger' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %out = load i128, ptr %ptr, align 8 +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i128 %out ; %out = load i128, ptr %ptr ret i128 %out diff --git a/llvm/test/Analysis/CostModel/ARM/load_store.ll b/llvm/test/Analysis/CostModel/ARM/load_store.ll index 4c322e9..dd2eaae 100644 --- a/llvm/test/Analysis/CostModel/ARM/load_store.ll +++ b/llvm/test/Analysis/CostModel/ARM/load_store.ll @@ -1,171 +1,117 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-NOVEC -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-NOVEC -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-FP -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-NEON -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-NEON -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=CHECK-V8-SIZE -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECK-NOVEC +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-NOVEC +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-FP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=arm-none-eabi -mcpu=cortex-a53 < %s | FileCheck %s --check-prefix=CHECK-NEON target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @stores() { ; CHECK-NOVEC-LABEL: 'stores' -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store double undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, ptr undef, align 2 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 2 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NOVEC-NEXT: Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store double undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i32> undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> undef, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x float> undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-FP-LABEL: 'stores' -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, ptr undef, align 2 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 2 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-FP-NEXT: Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of 1 for: store double undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i32> undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> undef, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x float> undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'stores' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> undef, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> undef, ptr undef, align 2 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, ptr undef, align 2 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> undef, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of 1 for: store double undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i32> undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> undef, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x float> undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i64> undef, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> undef, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> undef, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x float> undef, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'stores' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> undef, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> undef, ptr undef, align 2 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-V8-SIZE-LABEL: 'stores' -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 2 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'stores' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 2 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store i8 undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store i16 undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store i32 undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store i64 undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store i128 undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store float undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store double undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i8> undef, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x i16> undef, ptr undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <2 x i32> undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <8 x i16> undef, ptr undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <16 x i8> undef, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <4 x float> undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x double> undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <2 x float> undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> undef, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> undef, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <8 x i16> undef, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: store <4 x float> undef, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <2 x double> undef, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; store i8 undef, ptr undef, align 4 store i16 undef, ptr undef, align 4 @@ -199,160 +145,108 @@ define void @stores() { define void @loads() { ; CHECK-NOVEC-LABEL: 'loads' -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load double, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, ptr undef, align 2 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, ptr undef, align 2 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, ptr undef, align 4 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, ptr undef, align 1 -; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1 +; CHECK-NOVEC-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-FP-LABEL: 'loads' -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, ptr undef, align 2 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, ptr undef, align 2 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, ptr undef, align 4 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, ptr undef, align 1 -; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1 +; CHECK-FP-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-MVE-LABEL: 'loads' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %8 = load <2 x i8>, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %9 = load <2 x i16>, ptr undef, align 2 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %10 = load <2 x i32>, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <2 x i64>, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <4 x i32>, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <8 x i16>, ptr undef, align 2 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <16 x i8>, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <4 x float>, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = load <2 x float>, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, ptr undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <2 x i64>, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <4 x i32>, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i16>, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x float>, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, ptr undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'loads' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, ptr undef, align 4 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, ptr undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-V8-SIZE-LABEL: 'loads' -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, ptr undef, align 4 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, ptr undef, align 1 -; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'loads' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, ptr undef, align 4 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, ptr undef, align 1 -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %1 = load i8, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %2 = load i16, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %3 = load i32, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %4 = load i64, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %5 = load i128, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %6 = load float, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %7 = load double, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %8 = load <2 x i8>, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %9 = load <2 x i16>, ptr undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %10 = load <2 x i32>, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %11 = load <2 x i64>, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %12 = load <4 x i32>, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %13 = load <8 x i16>, ptr undef, align 2 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %14 = load <16 x i8>, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %15 = load <4 x float>, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %16 = load <4 x double>, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %17 = load <2 x float>, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %18 = load <2 x double>, ptr undef, align 4 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %19 = load <2 x i64>, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %20 = load <4 x i32>, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %21 = load <8 x i16>, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %22 = load <4 x float>, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %23 = load <2 x double>, ptr undef, align 1 +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; load i8, ptr undef, align 4 load i16, ptr undef, align 4 diff --git a/llvm/test/Analysis/CostModel/ARM/logicalop.ll b/llvm/test/Analysis/CostModel/ARM/logicalop.ll index 967426c..82eb716 100644 --- a/llvm/test/Analysis/CostModel/ARM/logicalop.ll +++ b/llvm/test/Analysis/CostModel/ARM/logicalop.ll @@ -1,72 +1,40 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1 +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @op() { ; Logical and/or - select's cost must be equivalent to that of binop -; CHECK-MVE-RECIP-LABEL: 'op' -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-NEON-RECIP-LABEL: 'op' -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-THUMB1-RECIP-LABEL: 'op' -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB2-RECIP-LABEL: 'op' -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or i1 undef, undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'op' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-NEON-SIZE-LABEL: 'op' -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB1-SIZE-LABEL: 'op' -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB2-SIZE-LABEL: 'op' -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sand = select i1 undef, i1 undef, i1 false -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %band = and i1 undef, undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sor = select i1 undef, i1 true, i1 undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %bor = or i1 undef, undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-LABEL: 'op' +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-NEON-LABEL: 'op' +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-THUMB1-LABEL: 'op' +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of 1 for: ret void +; +; CHECK-THUMB2-LABEL: 'op' +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %sand = select i1 undef, i1 undef, i1 false +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %band = and i1 undef, undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %sor = select i1 undef, i1 true, i1 undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %bor = or i1 undef, undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of 1 for: ret void ; %sand = select i1 undef, i1 undef, i1 false %band = and i1 undef, undef @@ -77,61 +45,33 @@ define void @op() { } define void @vecop() { -; CHECK-MVE-RECIP-LABEL: 'vecop' -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %band = and <4 x i1> undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-NEON-RECIP-LABEL: 'vecop' -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-THUMB1-RECIP-LABEL: 'vecop' -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB2-RECIP-LABEL: 'vecop' -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'vecop' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-NEON-SIZE-LABEL: 'vecop' -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB1-SIZE-LABEL: 'vecop' -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB2-SIZE-LABEL: 'vecop' -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-LABEL: 'vecop' +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %band = and <4 x i1> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %bor = or <4 x i1> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-NEON-LABEL: 'vecop' +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %band = and <4 x i1> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %bor = or <4 x i1> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-THUMB1-LABEL: 'vecop' +; CHECK-THUMB1-NEXT: Cost Model: Found costs of 4 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer +; CHECK-THUMB1-NEXT: Cost Model: Found costs of 4 for: %band = and <4 x i1> undef, undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of 4 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of 4 for: %bor = or <4 x i1> undef, undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of 1 for: ret void +; +; CHECK-THUMB2-LABEL: 'vecop' +; CHECK-THUMB2-NEXT: Cost Model: Found costs of 4 for: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer +; CHECK-THUMB2-NEXT: Cost Model: Found costs of 4 for: %band = and <4 x i1> undef, undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of 4 for: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of 4 for: %bor = or <4 x i1> undef, undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of 1 for: ret void ; %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> <i1 false, i1 false, i1 false, i1 false> %band = and <4 x i1> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll index 17d4263..07d2bd0 100644 --- a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s + ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM @@ -17,11 +18,11 @@ target triple = "armv7--linux-gnueabihf" define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'direct' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 @@ -36,13 +37,13 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'ups1632' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T416, ptr %loadaddr ; ASM: vldr @@ -59,13 +60,13 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'upu1632' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T416, ptr %loadaddr ; ASM: vldr @@ -82,12 +83,12 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'ups3264' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = mul <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T232, ptr %loadaddr ; ASM: vldr @@ -104,12 +105,12 @@ define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'upu3264' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = mul <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T232, ptr %loadaddr ; ASM: vldr @@ -126,12 +127,12 @@ define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'dn3216' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = mul <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16> +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 diff --git a/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll b/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll index 7de2799..3ae02cd 100644 --- a/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll +++ b/llvm/test/Analysis/CostModel/ARM/muls-in-smlal-patterns.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP define i64 @test(i16 %a, i16 %b) { ; CHECK-LABEL: 'test' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; CHECK-NEXT: Cost Model: Found costs of 1 for: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i64 %ms ; ; CHECK-NO-DSP-LABEL: 'test' -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: ret i64 %ms ; %as = sext i16 %a to i32 %bs = sext i16 %b to i32 @@ -26,20 +26,20 @@ define i64 @test(i16 %a, i16 %b) { define i64 @withadd(i16 %a, i16 %b, i64 %c) { ; CHECK-LABEL: 'withadd' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NEXT: Cost Model: Found costs of 1 for: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; ; CHECK-NO-DSP-LABEL: 'withadd' -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = sext i16 %b to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %bs = sext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; %as = sext i16 %a to i32 %bs = sext i16 %b to i32 @@ -51,24 +51,24 @@ define i64 @withadd(i16 %a, i16 %b, i64 %c) { define i64 @withloads(ptr %pa, ptr %pb, i64 %c) { ; CHECK-LABEL: 'withloads' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2 +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %bs = sext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; ; CHECK-NO-DSP-LABEL: 'withloads' -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = sext i16 %a to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = sext i16 %b to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 0 for: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 0 for: %bs = sext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; %a = load i16, ptr %pa %b = load i16, ptr %pb @@ -82,18 +82,18 @@ define i64 @withloads(ptr %pa, ptr %pb, i64 %c) { define i64 @different_extend_ops(i16 %a, i16 %b) { ; CHECK-LABEL: 'different_extend_ops' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; CHECK-NEXT: Cost Model: Found costs of 1 for: %as = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i64 %ms ; ; CHECK-NO-DSP-LABEL: 'different_extend_ops' -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = sext i16 %a to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = sext i32 %m to i64 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %as = sext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %ms = sext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: ret i64 %ms ; %as = sext i16 %a to i32 %bs = zext i16 %b to i32 diff --git a/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll b/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll index 521816d13..04a9520 100644 --- a/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll +++ b/llvm/test/Analysis/CostModel/ARM/muls-in-umull-patterns.ll @@ -1,20 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main -mattr=+dsp < %s | FileCheck %s +; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple thumbv8.1-m.main < %s | FileCheck %s --check-prefix=CHECK-NO-DSP + define i64 @test(i16 %a, i16 %b) { ; CHECK-LABEL: 'test' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; CHECK-NEXT: Cost Model: Found costs of 1 for: %as = zext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i64 %ms ; ; CHECK-NO-DSP-LABEL: 'test' -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %as = zext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: ret i64 %ms ; %as = zext i16 %a to i32 %bs = zext i16 %b to i32 @@ -25,20 +26,20 @@ define i64 @test(i16 %a, i16 %b) { define i64 @withadd(i16 %a, i16 %b, i64 %c) { ; CHECK-LABEL: 'withadd' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NEXT: Cost Model: Found costs of 1 for: %as = zext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; ; CHECK-NO-DSP-LABEL: 'withadd' -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = zext i16 %a to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bs = zext i16 %b to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %as = zext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; %as = zext i16 %a to i32 %bs = zext i16 %b to i32 @@ -50,24 +51,24 @@ define i64 @withadd(i16 %a, i16 %b, i64 %c) { define i64 @withloads(ptr %pa, ptr %pb, i64 %c) { ; CHECK-LABEL: 'withloads' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = zext i16 %a to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = zext i16 %b to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %m = mul i32 %as, %bs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2 +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %as = zext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %bs = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found costs of 0 for: %m = mul i32 %as, %bs +; CHECK-NEXT: Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64 +; CHECK-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; ; CHECK-NO-DSP-LABEL: 'withloads' -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %pa, align 2 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = load i16, ptr %pb, align 2 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = zext i16 %a to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bs = zext i16 %b to i32 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %m = mul i32 %as, %bs -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ms = zext i32 %m to i64 -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = add i64 %c, %ms -; CHECK-NO-DSP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %a = load i16, ptr %pa, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %b = load i16, ptr %pb, align 2 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 0 for: %as = zext i16 %a to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 0 for: %bs = zext i16 %b to i32 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %m = mul i32 %as, %bs +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: %ms = zext i32 %m to i64 +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 2 for: %r = add i64 %c, %ms +; CHECK-NO-DSP-NEXT: Cost Model: Found costs of 1 for: ret i64 %r ; %a = load i16, ptr %pa %b = load i16, ptr %pb diff --git a/llvm/test/Analysis/CostModel/ARM/mve-intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/mve-intrinsic-cost-kinds.ll new file mode 100644 index 0000000..b3ad818 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/mve-intrinsic-cost-kinds.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve.fp -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +; Test a cross-section of intrinsics for various cost-kinds. +; Other test files may check for accuracy of a particular intrinsic +; across subtargets or types. This is just a basic correctness check using an +; ARM target and a legal scalar type (i32/float) and/or an +; illegal vector type (16 x i32/float). + +declare i32 @llvm.smax.i32(i32, i32) +declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) + +declare float @llvm.fmuladd.f32(float, float, float) +declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) + +declare float @llvm.log2.f32(float) +declare <16 x float> @llvm.log2.v16f32(<16 x float>) + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) + +declare float @llvm.maximum.f32(float, float) +declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) + +declare i32 @llvm.cttz.i32(i32, i1) +declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) + +declare i32 @llvm.ctlz.i32(i32, i1) +declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) + +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) +declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) + +declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) + +declare i32 @llvm.ssa.copy.i32(i32) +declare float @llvm.ssa.copy.f32(float) +declare ptr @llvm.ssa.copy.p0(ptr) + +define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) { +; CHECK-LABEL: 'smax' +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) + %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret void +} + +define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) { +; CHECK-LABEL: 'fmuladd' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; CHECK-NEXT: Cost Model: Found costs of 8 for: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) + ret void +} + +define void @log2(float %a, <16 x float> %va) { +; CHECK-LABEL: 'log2' +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %s = call float @llvm.log2.f32(float %a) +; CHECK-NEXT: Cost Model: Found costs of RThru:192 CodeSize:48 Lat:192 SizeLat:192 for: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call float @llvm.log2.f32(float %a) + %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) + ret void +} + +define void @constrained_fadd(float %a, <16 x float> %va) strictfp { +; CHECK-LABEL: 'constrained_fadd' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; CHECK-NEXT: Cost Model: Found costs of 48 for: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") + %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret void +} + +define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { +; CHECK-LABEL: 'fmaximum' +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %s = call float @llvm.maximum.f32(float %a, float %b) +; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:64 Lat:208 SizeLat:208 for: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call float @llvm.maximum.f32(float %a, float %b) + %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) + ret void +} + +define void @cttz(i32 %a, <16 x i32> %va) { +; CHECK-LABEL: 'cttz' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; CHECK-NEXT: Cost Model: Found costs of 8 for: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) + %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) + ret void +} + +define void @ctlz(i32 %a, <16 x i32> %va) { +; CHECK-LABEL: 'ctlz' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; CHECK-NEXT: Cost Model: Found costs of 8 for: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) + %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) + ret void +} + +define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) { +; CHECK-LABEL: 'fshl' +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; CHECK-NEXT: Cost Model: Found costs of RThru:120 CodeSize:92 Lat:120 SizeLat:120 for: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) + %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) + ret void +} + +define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) { +; CHECK-LABEL: 'maskedgather' +; CHECK-NEXT: Cost Model: Found costs of 176 for: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> align 1 %va, <16 x i1> %vb, <16 x float> %vc) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) + ret void +} + +define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) { +; CHECK-LABEL: 'maskedscatter' +; CHECK-NEXT: Cost Model: Found costs of 176 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> align 1 %vb, <16 x i1> %vc) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) + ret void +} + +define void @reduce_fmax(<16 x float> %va) { +; CHECK-LABEL: 'reduce_fmax' +; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:9 for: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) + ret void +} + +define void @memcpy(ptr %a, ptr %b, i32 %c) { +; CHECK-LABEL: 'memcpy' +; CHECK-NEXT: Cost Model: Found costs of 4 for: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) + ret void +} + +define void @ssa_copy() { +; CHECK-LABEL: 'ssa_copy' +; CHECK-NEXT: Cost Model: Found costs of 0 for: %i = call i32 @llvm.ssa.copy.i32(i32 undef) +; CHECK-NEXT: Cost Model: Found costs of 0 for: %f = call float @llvm.ssa.copy.f32(float undef) +; CHECK-NEXT: Cost Model: Found costs of 0 for: %p = call ptr @llvm.ssa.copy.p0(ptr undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %i = call i32 @llvm.ssa.copy.i32(i32 undef) + %f = call float @llvm.ssa.copy.f32(float undef) + %p = call ptr @llvm.ssa.copy.p0(ptr undef) + ret void +} diff --git a/llvm/test/Analysis/CostModel/ARM/mve-target-intrinsics.ll b/llvm/test/Analysis/CostModel/ARM/mve-target-intrinsics.ll new file mode 100644 index 0000000..d09216a --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/mve-target-intrinsics.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define void @intrinsics() { +; CHECK-LABEL: 'intrinsics' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found costs of 1 for: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef) +; CHECK-NEXT: Cost Model: Found costs of 1 for: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) +; CHECK-NEXT: Cost Model: Found costs of 1 for: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) + %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef) + %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) + %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) + ret void +} + +declare i32 @llvm.arm.ssat(i32, i32) +declare { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr) +declare { i32, i32 } @llvm.arm.mve.sqrshrl(i32, i32, i32, i32) +declare { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32, i32, i32, i32, i32, <8 x i16>, <8 x i16>) diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll index f626901..de429fd2 100644 --- a/llvm/test/Analysis/CostModel/ARM/select.ll +++ b/llvm/test/Analysis/CostModel/ARM/select.ll @@ -1,272 +1,140 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1-SIZE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-THUMB1 +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-THUMB2 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @selects() { ; Scalar values -; CHECK-MVE-RECIP-LABEL: 'selects' -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-LABEL: 'selects' +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v6 = select i1 undef, double undef, double undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; -; CHECK-NEON-RECIP-LABEL: 'selects' -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-LABEL: 'selects' +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v6 = select i1 undef, double undef, double undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 19 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 50 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 100 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; -; CHECK-THUMB1-RECIP-LABEL: 'selects' -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-THUMB1-LABEL: 'selects' +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v6 = select i1 undef, double undef, double undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:6 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of RThru:20 CodeSize:25 Lat:20 SizeLat:20 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB1-NEXT: Cost Model: Found costs of 1 for: ret void ; -; CHECK-THUMB2-RECIP-LABEL: 'selects' -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-MVE-SIZE-LABEL: 'selects' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-NEON-SIZE-LABEL: 'selects' -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB1-SIZE-LABEL: 'selects' -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB2-SIZE-LABEL: 'selects' -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = select i1 undef, i1 undef, i1 undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = select i1 undef, i8 undef, i8 undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = select i1 undef, i16 undef, i16 undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3 = select i1 undef, i32 undef, i32 undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-THUMB2-LABEL: 'selects' +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:3 Lat:1 SizeLat:1 for: %v0 = select i1 undef, i1 undef, i1 undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v1 = select i1 undef, i8 undef, i8 undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v2 = select i1 undef, i16 undef, i16 undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v3 = select i1 undef, i32 undef, i32 undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v4 = select i1 undef, i64 undef, i64 undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %v5 = select i1 undef, float undef, float undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v6 = select i1 undef, double undef, double undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:24 Lat:16 SizeLat:16 for: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:48 Lat:32 SizeLat:32 for: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:32 SizeLat:32 for: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:64 SizeLat:64 for: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:6 Lat:4 SizeLat:4 for: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:8 SizeLat:8 for: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:9 Lat:6 SizeLat:6 for: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:25 Lat:20 SizeLat:20 for: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB2-NEXT: Cost Model: Found costs of 1 for: ret void ; %v0 = select i1 undef, i1 undef, i1 undef %v1 = select i1 undef, i8 undef, i8 undef diff --git a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll index 465611d..5ef869e 100644 --- a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s + ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM @@ -17,11 +18,11 @@ target triple = "armv7--linux-gnueabihf" define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'direct' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 @@ -36,13 +37,13 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'ups1632' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T416, ptr %loadaddr ; ASM: vldr @@ -59,13 +60,13 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'upu1632' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T416, ptr %loadaddr ; ASM: vldr @@ -82,12 +83,12 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'ups3264' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T232, ptr %loadaddr ; ASM: vldr @@ -104,12 +105,12 @@ define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'upu3264' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T232, ptr %loadaddr ; ASM: vldr @@ -126,12 +127,12 @@ define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'dn3216' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r3 = shl <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16> +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll index a17bbab..0fd1456 100644 --- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll +++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll @@ -1,59 +1,59 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=CHECK-MVE +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s --check-prefix=CHECK-NEON target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define void @broadcast() { ; CHECK-MVE-LABEL: 'broadcast' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:80 CodeSize:40 Lat:80 SizeLat:80 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:10 SizeLat:10 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'broadcast' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 8 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 14 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 26 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 50 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer @@ -90,58 +90,58 @@ define void @broadcast() { ;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords, on neon) define void @reverse() { ; CHECK-MVE-LABEL: 'reverse' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'reverse' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 10 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v3i24 = shufflevector <3 x i24> undef, <3 x i24> undef, <3 x i32> <i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 18 for: %v3i124 = shufflevector <3 x i124> undef, <3 x i124> undef, <3 x i32> <i32 2, i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0> %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> @@ -179,40 +179,40 @@ define void @reverse() { define void @concat() { ; CHECK-MVE-LABEL: 'concat' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'concat' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of 12 for: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of 24 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 12 for: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of 24 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 12 for: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of 24 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 12 for: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of 10 for: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 40 for: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 10 for: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -241,54 +241,54 @@ define void @concat() { define void @select() { ; CHECK-MVE-LABEL: 'select' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'select' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of 6 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 32 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 16 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 32 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 10 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of 4 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> +; CHECK-NEON-NEXT: Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0> +; CHECK-NEON-NEXT: Cost Model: Found costs of 2 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3> %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7> @@ -324,40 +324,40 @@ define void @select() { define void @vrev2() { ; CHECK-MVE-LABEL: 'vrev2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'vrev2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of 24 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-NEON-NEXT: Cost Model: Found costs of 96 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-NEON-NEXT: Cost Model: Found costs of 24 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-NEON-NEXT: Cost Model: Found costs of 96 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-NEON-NEXT: Cost Model: Found costs of 24 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-NEON-NEXT: Cost Model: Found costs of 24 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-NEON-NEXT: Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-NEON-NEXT: Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> +; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-NEON-NEXT: Cost Model: Found costs of 40 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> +; CHECK-NEON-NEXT: Cost Model: Found costs of 8 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> @@ -386,26 +386,26 @@ define void @vrev2() { define void @vrev4() { ; CHECK-MVE-LABEL: 'vrev4' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-NEON-LABEL: 'vrev4' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-NEON-NEXT: Cost Model: Found costs of 96 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-NEON-NEXT: Cost Model: Found costs of 96 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> +; CHECK-NEON-NEXT: Cost Model: Found costs of 48 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-NEON-NEXT: Cost Model: Found costs of 40 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-NEON-NEXT: Cost Model: Found costs of 80 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> +; CHECK-NEON-NEXT: Cost Model: Found costs of 40 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> diff --git a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll index df26121..924a629 100644 --- a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s +; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s + ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM @@ -17,11 +18,11 @@ target triple = "armv7--linux-gnueabihf" define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'direct' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 @@ -36,13 +37,13 @@ define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'ups1632' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = sext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = sext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T416, ptr %loadaddr ; ASM: vldr @@ -59,13 +60,13 @@ define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'upu1632' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i16>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i16>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 0 for: %r1 = zext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 0 for: %r2 = zext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %r3, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T416, ptr %loadaddr ; ASM: vldr @@ -82,12 +83,12 @@ define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'ups3264' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = sub <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = sext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T232, ptr %loadaddr ; ASM: vldr @@ -104,12 +105,12 @@ define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'upu3264' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <2 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <2 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = sub <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = zext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found costs of 1 for: store <2 x i64> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T232, ptr %loadaddr ; ASM: vldr @@ -126,12 +127,12 @@ define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) { ; COST-LABEL: 'dn3216' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1 -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16> -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8 -; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v0 = load <4 x i32>, ptr %loadaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %v1 = load <4 x i32>, ptr %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found costs of 1 for: %r3 = sub <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found costs of 1 for: %st = trunc <4 x i32> %r3 to <4 x i16> +; COST-NEXT: Cost Model: Found costs of 1 for: store <4 x i16> %st, ptr %storeaddr, align 8 +; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v0 = load %T432, ptr %loadaddr ; ASM: vld1.64 diff --git a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll b/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll deleted file mode 100644 index 2d6b318..0000000 --- a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll +++ /dev/null @@ -1,41 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-THUMB2-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-THUMB2-RECIP -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-THUMB2-LAT -; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-THUMB2-SIZE - -target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - -define void @intrinsics() { -; CHECK-THUMB2-RECIP-LABEL: 'intrinsics' -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef) -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-THUMB2-LAT-LABEL: 'intrinsics' -; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) -; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef) -; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) -; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) -; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; -; CHECK-THUMB2-SIZE-LABEL: 'intrinsics' -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef) -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void -; - %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) - %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef) - %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) - %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) - ret void -} - -declare i32 @llvm.arm.ssat(i32, i32) -declare { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr) -declare { i32, i32 } @llvm.arm.mve.sqrshrl(i32, i32, i32, i32) -declare { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32, i32, i32, i32, i32, <8 x i16>, <8 x i16>) diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll new file mode 100644 index 0000000..0f85478 --- /dev/null +++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll @@ -0,0 +1,321 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P9 +; RUN: opt < %s -mcpu=pwr9 -mtriple=powerpc64-ibm-aix -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P9BE +; RUN: opt < %s -mcpu=pwr9 -mtriple=powerpc-ibm-aix -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P932 +; RUN: opt < %s -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P10 +; RUN: opt < %s -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P10BE +; RUN: opt < %s -mcpu=pwr10 -mtriple=powerpc-ibm-aix -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P1032 +; RUN: opt < %s -mcpu=future -mtriple=powerpc64le-unknown-linux-gnu -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTURE +; RUN: opt < %s -mcpu=future -mtriple=powerpc64-ibm-aix -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTUREBE +; RUN: opt < %s -mcpu=future -mtriple=powerpc-ibm-aix -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTURE32 + +define void @bar(ptr %base, <2 x i8> %val) { +; P9-LABEL: 'bar' +; P9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; P9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; P9-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; P9-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; P9-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; P9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; P9-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; P9-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P9-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; P9BE-LABEL: 'bar' +; P9BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; P9BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; P9BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; P9BE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; P9BE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P9BE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; P9BE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; P9BE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; P9BE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; P9BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; P9BE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; P9BE-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; P932-LABEL: 'bar' +; P932-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; P932-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P932-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; P932-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P932-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; P932-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P932-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; P932-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P932-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; P932-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; P932-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; P932-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; P932-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; P932-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; P932-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P932-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; P10-LABEL: 'bar' +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; P10-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; P10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; P10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; P10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; P10-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; P10BE-LABEL: 'bar' +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P10BE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; P10BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; P10BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; P10BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; P10BE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; P10BE-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P10BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; P1032-LABEL: 'bar' +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P1032-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; P1032-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; P1032-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; P1032-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; P1032-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; P1032-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; P1032-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; P1032-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; P1032-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; FUTURE-LABEL: 'bar' +; FUTURE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; FUTURE-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; FUTURE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; FUTUREBE-LABEL: 'bar' +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; FUTUREBE-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; FUTUREBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; FUTURE32-LABEL: 'bar' +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i8> %val) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i16> zeroinitializer) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i32> zeroinitializer) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr align 1 %base, <2 x i1> splat (i1 true), <2 x i64> zeroinitializer) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr align 1 %base, <2 x i1> splat (i1 true)) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr align 1 %base, <3 x i1> splat (i1 true), <3 x i64> zeroinitializer) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr align 1 %base, <3 x i1> splat (i1 true)) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr align 1 %base, <4 x i1> splat (i1 true), <4 x i15> zeroinitializer) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr align 1 %base, <4 x i1> splat (i1 true)) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i16.p0(<2 x i16> %x8, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> splat (i1 true), <2 x i8> zeroinitializer) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> splat (i1 true)) +; FUTURE32-NEXT: Cost Model: Invalid cost for instruction: %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> splat (i1 true), i32 1) +; FUTURE32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i8> %val) + call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>) + %x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i16> <i16 0, i16 0>) + call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>) + %x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i32> <i32 0, i32 0>) + call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>) + %x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i64> <i64 0, i64 0>) + call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>) + %x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr %base, i32 1, <3 x i1> <i1 1, i1 1, i1 1>, <3 x i64> <i64 0, i64 0, i64 0>) + call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr %base, i32 1, <3 x i1> <i1 1, i1 1, i1 1>) + %x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr %base, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i15> <i15 0, i15 0, i15 0, i15 0>) + call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr %base, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>) + %x7 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + call void @llvm.vp.store.v2i8.p0(<2 x i8> %x7, ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + %x8 = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + call void @llvm.vp.store.v2i6.p0(<2 x i16> %x8, ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + %x9 = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + call void @llvm.vp.store.v2i32.p0(<2 x i32> %x9, ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + %x10 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + call void @llvm.vp.store.v2i64.p0(<2 x i64> %x10, ptr %base, <2 x i1> <i1 1, i1 1>, i32 1) + %x11 = call <3 x i64> @llvm.vp.load.v3i64.p0(ptr %base, <3 x i1> <i1 1, i1 1, i1 1>, i32 1) + call void @llvm.vp.store.v3i64.p0(<3 x i64> %x11, ptr %base, <3 x i1> <i1 1, i1 1, i1 1>, i32 1) + %x12 = call <4 x i15> @llvm.vp.load.v4i15.p0(ptr %base, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, i32 1) + call void @llvm.vp.store.v4i15.p0(<4 x i15> %x12, ptr %base, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, i32 1) + %x13 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> <i1 1, i1 1>, <2 x i8> <i8 0, i8 0>) + call void @llvm.masked.compressstore.v2i8(<2 x i8> %x13, ptr %base, <2 x i1> <i1 1, i1 1>) + %x14 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.i64(ptr %base, i64 1, <2 x i1> <i1 1, i1 1>, i32 1) + call void @llvm.experimental.vp.strided.store.v2i8.i64(<2 x i8> %x14, ptr %base, i64 1, <2 x i1> <i1 1, i1 1>, i32 1) + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/abs.ll b/llvm/test/Analysis/CostModel/RISCV/abs.ll index b1f93f3..80dd006 100644 --- a/llvm/test/Analysis/CostModel/RISCV/abs.ll +++ b/llvm/test/Analysis/CostModel/RISCV/abs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+experimental-zvabd | FileCheck %s --check-prefix=ZVABD ; Check that we don't crash querying costs when vectors are not enabled. ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 @@ -39,6 +40,41 @@ define i32 @abs(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %32 = call <vscale x 64 x i8> @llvm.abs.nxv64i8(<vscale x 64 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; ZVABD-LABEL: 'abs' +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = call <vscale x 16 x i32> @llvm.abs.nxv16i32(<vscale x 16 x i32> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 2 x i16> @llvm.abs.nxv2i16(<vscale x 2 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <vscale x 4 x i16> @llvm.abs.nxv4i16(<vscale x 4 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %24 = call <vscale x 32 x i16> @llvm.abs.nxv32i16(<vscale x 32 x i16> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %28 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call <vscale x 32 x i8> @llvm.abs.nxv32i8(<vscale x 32 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %32 = call <vscale x 64 x i8> @llvm.abs.nxv64i8(<vscale x 64 x i8> undef, i1 false) +; ZVABD-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll index 673bf38..03bdd43 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -1248,35 +1248,35 @@ define void @fdiv_f16() { define void @frem() { ; CHECK-LABEL: 'frem' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F32 = frem <1 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <2 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = frem <4 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8F32 = frem <8 x float> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16F32 = frem <16 x float> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32 = frem <vscale x 1 x float> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32 = frem <vscale x 2 x float> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32 = frem <vscale x 4 x float> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32 = frem <vscale x 8 x float> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32 = frem <vscale x 16 x float> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F64 = frem <1 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <2 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4F64 = frem <4 x double> poison, poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8F64 = frem <8 x double> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64 = frem <vscale x 1 x double> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> poison, poison ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> poison, poison -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8F32_VP = call <8 x float> @llvm.vp.frem.v8f32(<8 x float> poison, <8 x float> poison, <8 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V16F32_VP = call <16 x float> @llvm.vp.frem.v16f32(<16 x float> poison, <16 x float> poison, <16 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1F64_VP = call <1 x double> @llvm.vp.frem.v1f64(<1 x double> poison, <1 x double> poison, <1 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison) @@ -1340,24 +1340,24 @@ define void @frem() { define void @frem_bf16() { ; ZVFH-LABEL: 'frem_bf16' -; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; ZVFH-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison -; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) -; ZVFH-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFH-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) ; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) @@ -1366,24 +1366,24 @@ define void @frem_bf16() { ; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; ZVFHMIN-LABEL: 'frem_bf16' -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) -; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) ; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) @@ -1392,24 +1392,24 @@ define void @frem_bf16() { ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; NO-ZFHMIN-LABEL: 'frem_bf16' -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %BF16 = frem bfloat poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison) @@ -1502,24 +1502,24 @@ define void @frem_f16() { ; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; NO-ZFHMIN-LABEL: 'frem_f16' -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = frem <1 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = frem <2 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = frem <4 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = frem <8 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = frem <16 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = frem <32 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F16 = frem half poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = frem <1 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F16 = frem <2 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F16 = frem <4 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F16 = frem <8 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = frem <16 x half> poison, poison +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32F16 = frem <32 x half> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) -; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison) +; NO-ZFHMIN-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison) ; NO-ZFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison) diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll index 5afc199..7e9a1909 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll @@ -705,72 +705,72 @@ define i32 @udiv() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = udiv <1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = udiv <2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = udiv <4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = udiv <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = udiv <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = udiv <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = udiv <vscale x 1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = udiv <vscale x 2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = udiv <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = udiv <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = udiv <vscale x 16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = udiv <vscale x 32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = udiv <1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = udiv <2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = udiv <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = udiv <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = udiv <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = udiv <vscale x 1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = udiv <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = udiv <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = udiv <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = udiv <vscale x 16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = udiv <1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = udiv <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = udiv <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = udiv <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = udiv <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = udiv <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = udiv <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = udiv <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = udiv <1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = udiv <2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = udiv <4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = udiv <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = udiv <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = udiv <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = udiv <vscale x 1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = udiv <vscale x 2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I16 = udiv <vscale x 4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I16 = udiv <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I16 = udiv <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV32I16 = udiv <vscale x 32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = udiv <1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = udiv <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = udiv <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = udiv <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = udiv <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = udiv <vscale x 1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I32 = udiv <vscale x 2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I32 = udiv <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I32 = udiv <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I32 = udiv <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = udiv <1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = udiv <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = udiv <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = udiv <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I64 = udiv <vscale x 1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I64 = udiv <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I64 = udiv <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I64 = udiv <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIFIVE-X280-LABEL: 'udiv' -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = udiv <1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = udiv <2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = udiv <4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = udiv <8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = udiv <16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = udiv <32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = udiv <vscale x 1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = udiv <vscale x 2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I16 = udiv <vscale x 4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I16 = udiv <vscale x 8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I16 = udiv <vscale x 16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32I16 = udiv <vscale x 32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = udiv <1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = udiv <2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = udiv <4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = udiv <8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = udiv <16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = udiv <vscale x 1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I32 = udiv <vscale x 2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I32 = udiv <vscale x 4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I32 = udiv <vscale x 8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I32 = udiv <vscale x 16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = udiv <1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = udiv <2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = udiv <4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = udiv <8 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1I64 = udiv <vscale x 1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I64 = udiv <vscale x 2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I64 = udiv <vscale x 4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I64 = udiv <vscale x 8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = udiv <1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = udiv <2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = udiv <4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = udiv <8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = udiv <16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = udiv <32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = udiv <vscale x 1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = udiv <vscale x 2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I16 = udiv <vscale x 4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I16 = udiv <vscale x 8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I16 = udiv <vscale x 16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV32I16 = udiv <vscale x 32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = udiv <1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = udiv <2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = udiv <4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = udiv <8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = udiv <16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = udiv <vscale x 1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I32 = udiv <vscale x 2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I32 = udiv <vscale x 4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I32 = udiv <vscale x 8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV16I32 = udiv <vscale x 16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = udiv <1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = udiv <2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = udiv <4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = udiv <8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV1I64 = udiv <vscale x 1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV2I64 = udiv <vscale x 2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV4I64 = udiv <vscale x 4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV8I64 = udiv <vscale x 8 x i64> undef, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = udiv i16 undef, undef @@ -821,72 +821,72 @@ define i32 @urem() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = urem <1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = urem <2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = urem <4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = urem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = urem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = urem <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = urem <vscale x 1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = urem <vscale x 2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = urem <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = urem <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = urem <vscale x 16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = urem <vscale x 32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = urem <1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = urem <2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = urem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = urem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = urem <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = urem <vscale x 1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = urem <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = urem <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = urem <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = urem <vscale x 16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = urem <1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = urem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = urem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = urem <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = urem <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = urem <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = urem <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = urem <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = urem <1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = urem <2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = urem <4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = urem <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = urem <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = urem <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = urem <vscale x 1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = urem <vscale x 2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I16 = urem <vscale x 4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I16 = urem <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I16 = urem <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV32I16 = urem <vscale x 32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = urem <1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = urem <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = urem <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = urem <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = urem <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = urem <vscale x 1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I32 = urem <vscale x 2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I32 = urem <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I32 = urem <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I32 = urem <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = urem <1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = urem <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = urem <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = urem <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I64 = urem <vscale x 1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I64 = urem <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I64 = urem <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I64 = urem <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIFIVE-X280-LABEL: 'urem' -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = urem <1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = urem <2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = urem <4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = urem <8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = urem <16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = urem <32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = urem <vscale x 1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = urem <vscale x 2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I16 = urem <vscale x 4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I16 = urem <vscale x 8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I16 = urem <vscale x 16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32I16 = urem <vscale x 32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = urem <1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = urem <2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = urem <4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = urem <8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = urem <16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = urem <vscale x 1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I32 = urem <vscale x 2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I32 = urem <vscale x 4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I32 = urem <vscale x 8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I32 = urem <vscale x 16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = urem <1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = urem <2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = urem <4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = urem <8 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1I64 = urem <vscale x 1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I64 = urem <vscale x 2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I64 = urem <vscale x 4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I64 = urem <vscale x 8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = urem <1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = urem <2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = urem <4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = urem <8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = urem <16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = urem <32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = urem <vscale x 1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = urem <vscale x 2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I16 = urem <vscale x 4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I16 = urem <vscale x 8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I16 = urem <vscale x 16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV32I16 = urem <vscale x 32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = urem <1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = urem <2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = urem <4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = urem <8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = urem <16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = urem <vscale x 1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I32 = urem <vscale x 2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I32 = urem <vscale x 4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I32 = urem <vscale x 8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV16I32 = urem <vscale x 16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = urem <1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = urem <2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = urem <4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = urem <8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV1I64 = urem <vscale x 1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV2I64 = urem <vscale x 2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV4I64 = urem <vscale x 4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV8I64 = urem <vscale x 8 x i64> undef, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = urem i16 undef, undef @@ -937,72 +937,72 @@ define i32 @sdiv() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = sdiv <1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = sdiv <2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = sdiv <4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sdiv <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = sdiv <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = sdiv <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = sdiv <vscale x 1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = sdiv <vscale x 2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = sdiv <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = sdiv <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = sdiv <vscale x 16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = sdiv <vscale x 32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = sdiv <1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = sdiv <2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sdiv <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = sdiv <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = sdiv <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = sdiv <vscale x 1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = sdiv <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = sdiv <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = sdiv <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = sdiv <vscale x 16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = sdiv <1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sdiv <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = sdiv <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = sdiv <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = sdiv <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = sdiv <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = sdiv <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = sdiv <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = sdiv <1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = sdiv <2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = sdiv <4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = sdiv <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = sdiv <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = sdiv <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = sdiv <vscale x 1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = sdiv <vscale x 2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I16 = sdiv <vscale x 4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I16 = sdiv <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I16 = sdiv <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV32I16 = sdiv <vscale x 32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = sdiv <1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = sdiv <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = sdiv <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = sdiv <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = sdiv <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = sdiv <vscale x 1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I32 = sdiv <vscale x 2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I32 = sdiv <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I32 = sdiv <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I32 = sdiv <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = sdiv <1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = sdiv <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = sdiv <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = sdiv <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I64 = sdiv <vscale x 1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I64 = sdiv <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I64 = sdiv <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I64 = sdiv <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIFIVE-X280-LABEL: 'sdiv' -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = sdiv <1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = sdiv <2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = sdiv <4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sdiv <8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = sdiv <16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = sdiv <32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = sdiv <vscale x 1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = sdiv <vscale x 2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I16 = sdiv <vscale x 4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I16 = sdiv <vscale x 8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I16 = sdiv <vscale x 16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32I16 = sdiv <vscale x 32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = sdiv <1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = sdiv <2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sdiv <4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = sdiv <8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = sdiv <16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = sdiv <vscale x 1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I32 = sdiv <vscale x 2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I32 = sdiv <vscale x 4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I32 = sdiv <vscale x 8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I32 = sdiv <vscale x 16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = sdiv <1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sdiv <2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = sdiv <4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = sdiv <8 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1I64 = sdiv <vscale x 1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I64 = sdiv <vscale x 2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I64 = sdiv <vscale x 4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I64 = sdiv <vscale x 8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = sdiv <1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = sdiv <2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = sdiv <4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = sdiv <8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = sdiv <16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = sdiv <32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = sdiv <vscale x 1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = sdiv <vscale x 2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I16 = sdiv <vscale x 4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I16 = sdiv <vscale x 8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I16 = sdiv <vscale x 16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV32I16 = sdiv <vscale x 32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = sdiv <1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = sdiv <2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = sdiv <4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = sdiv <8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = sdiv <16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = sdiv <vscale x 1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I32 = sdiv <vscale x 2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I32 = sdiv <vscale x 4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I32 = sdiv <vscale x 8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV16I32 = sdiv <vscale x 16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = sdiv <1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = sdiv <2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = sdiv <4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = sdiv <8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV1I64 = sdiv <vscale x 1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV2I64 = sdiv <vscale x 2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV4I64 = sdiv <vscale x 4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV8I64 = sdiv <vscale x 8 x i64> undef, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = sdiv i16 undef, undef @@ -1053,72 +1053,72 @@ define i32 @srem() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = srem <1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = srem <2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = srem <4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = srem <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = srem <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = srem <32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = srem <vscale x 1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = srem <vscale x 2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = srem <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = srem <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = srem <vscale x 16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = srem <vscale x 32 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = srem <1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = srem <2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = srem <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = srem <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = srem <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = srem <vscale x 1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = srem <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = srem <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = srem <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = srem <vscale x 16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = srem <1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = srem <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = srem <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = srem <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = srem <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = srem <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = srem <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = srem <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = srem <1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = srem <2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = srem <4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = srem <8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = srem <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = srem <32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = srem <vscale x 1 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = srem <vscale x 2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I16 = srem <vscale x 4 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I16 = srem <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I16 = srem <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV32I16 = srem <vscale x 32 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = srem <1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = srem <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = srem <4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = srem <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = srem <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = srem <vscale x 1 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I32 = srem <vscale x 2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I32 = srem <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I32 = srem <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I32 = srem <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = srem <1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = srem <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = srem <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = srem <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I64 = srem <vscale x 1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I64 = srem <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I64 = srem <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I64 = srem <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIFIVE-X280-LABEL: 'srem' -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = srem <1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = srem <2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = srem <4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = srem <8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = srem <16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = srem <32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = srem <vscale x 1 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = srem <vscale x 2 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I16 = srem <vscale x 4 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I16 = srem <vscale x 8 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I16 = srem <vscale x 16 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32I16 = srem <vscale x 32 x i16> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = srem <1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = srem <2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = srem <4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = srem <8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = srem <16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = srem <vscale x 1 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I32 = srem <vscale x 2 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I32 = srem <vscale x 4 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I32 = srem <vscale x 8 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16I32 = srem <vscale x 16 x i32> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = srem <1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = srem <2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = srem <4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = srem <8 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1I64 = srem <vscale x 1 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I64 = srem <vscale x 2 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I64 = srem <vscale x 4 x i64> undef, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I64 = srem <vscale x 8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = srem <1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = srem <2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = srem <4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = srem <8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = srem <16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = srem <32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I16 = srem <vscale x 1 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2I16 = srem <vscale x 2 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4I16 = srem <vscale x 4 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8I16 = srem <vscale x 8 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV16I16 = srem <vscale x 16 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV32I16 = srem <vscale x 32 x i16> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = srem <1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = srem <2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = srem <4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = srem <8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = srem <16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1I32 = srem <vscale x 1 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV2I32 = srem <vscale x 2 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV4I32 = srem <vscale x 4 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV8I32 = srem <vscale x 8 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV16I32 = srem <vscale x 16 x i32> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = srem <1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = srem <2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = srem <4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = srem <8 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV1I64 = srem <vscale x 1 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV2I64 = srem <vscale x 2 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NXV4I64 = srem <vscale x 4 x i64> undef, undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NXV8I64 = srem <vscale x 8 x i64> undef, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = srem i16 undef, undef diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll index e64bce2..6dacd59 100644 --- a/llvm/test/Analysis/CostModel/RISCV/cast.ll +++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll @@ -6239,3 +6239,13 @@ define void @legalization_crash() { fptoui <192 x float> undef to <192 x i1> ret void } + +; Test that types that need to be split go through BasicTTIImpl. +define void @BitInt_crash() { +; ZVE64X-LABEL: 'BitInt_crash' +; ZVE64X-NEXT: Cost Model: Found an estimated cost of 2043 for instruction: %1 = bitcast <16 x i64> poison to <512 x i2> +; ZVE64X-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + bitcast <16 x i64> poison to <512 x i2> + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/cmp-select.ll b/llvm/test/Analysis/CostModel/RISCV/cmp-select.ll index dc0810b..58848cc 100644 --- a/llvm/test/Analysis/CostModel/RISCV/cmp-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/cmp-select.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+f,+short-forward-branch-opt -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=SFB64 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+f,+short-forward-branch-ialu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=SFB64 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+f -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=RV64 define i32 @icmp-iselect(i64 %ca, i64 %cb, i32 %a, i32 %b) { diff --git a/llvm/test/Analysis/CostModel/RISCV/constant-pool.ll b/llvm/test/Analysis/CostModel/RISCV/constant-pool.ll new file mode 100644 index 0000000..1bf646f --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/constant-pool.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mcpu=sifive-x390 < %s | FileCheck %s --check-prefix=CHECK +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mcpu=sifive-p670 < %s | FileCheck %s --check-prefix=CHECK_FUSION +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mcpu=sifive-x390 -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mcpu=sifive-p670 -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE_FUSION + +; Storing an immediate vector implicity calls RISCVTTIImpl::getConstantPoolLoadCost() +define void @rvv_store_imm(ptr %X) { +; CHECK-LABEL: 'rvv_store_imm' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i8> <i8 2, i8 3>, ptr %X, align 2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> <i8 2, i8 3, i8 5, i8 10>, ptr %X, align 4 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8> <i8 2, i8 3, i8 5, i8 10, i8 11, i8 12, i8 23, i8 0>, ptr %X, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK_FUSION-LABEL: 'rvv_store_imm' +; CHECK_FUSION-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <2 x i8> <i8 2, i8 3>, ptr %X, align 2 +; CHECK_FUSION-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i8> <i8 2, i8 3, i8 5, i8 10>, ptr %X, align 4 +; CHECK_FUSION-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <8 x i8> <i8 2, i8 3, i8 5, i8 10, i8 11, i8 12, i8 23, i8 0>, ptr %X, align 8 +; CHECK_FUSION-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'rvv_store_imm' +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i8> <i8 2, i8 3>, ptr %X, align 2 +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> <i8 2, i8 3, i8 5, i8 10>, ptr %X, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8> <i8 2, i8 3, i8 5, i8 10, i8 11, i8 12, i8 23, i8 0>, ptr %X, align 8 +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_FUSION-LABEL: 'rvv_store_imm' +; SIZE_FUSION-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i8> <i8 2, i8 3>, ptr %X, align 2 +; SIZE_FUSION-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> <i8 2, i8 3, i8 5, i8 10>, ptr %X, align 4 +; SIZE_FUSION-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8> <i8 2, i8 3, i8 5, i8 10, i8 11, i8 12, i8 23, i8 0>, ptr %X, align 8 +; SIZE_FUSION-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + store <2 x i8> <i8 2, i8 3>, ptr %X + store <4 x i8> <i8 2, i8 3, i8 5, i8 10>, ptr %X + store <8 x i8> <i8 2, i8 3, i8 5, i8 10, i8 11, i8 12, i8 23, i8 0> , ptr %X + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/fshl_fshr.ll b/llvm/test/Analysis/CostModel/RISCV/fshl_fshr.ll new file mode 100644 index 0000000..04e6745 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/fshl_fshr.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv32 < %s | FileCheck %s --check-prefix=RV32 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv64 < %s | FileCheck %s --check-prefix=RV64 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv32 -mattr=+zbb < %s | FileCheck %s --check-prefix=RV32ZBB +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=riscv64 -mattr=+zbb < %s | FileCheck %s --check-prefix=RV64ZBB + +define i32 @rotl_i32_3rd_arg_const(i32 %a) { +; RV32-LABEL: 'rotl_i32_3rd_arg_const' +; RV32-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64-LABEL: 'rotl_i32_3rd_arg_const' +; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV32ZBB-LABEL: 'rotl_i32_3rd_arg_const' +; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64ZBB-LABEL: 'rotl_i32_3rd_arg_const' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9) + ret i32 %r +} + +define i32 @rotl_i32_3rd_arg_var(i32 %a, i32 %c) { +; RV32-LABEL: 'rotl_i32_3rd_arg_var' +; RV32-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64-LABEL: 'rotl_i32_3rd_arg_var' +; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV32ZBB-LABEL: 'rotl_i32_3rd_arg_var' +; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64ZBB-LABEL: 'rotl_i32_3rd_arg_var' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c) + ret i32 %r +} + +define i64 @rotl_i64_3rd_arg_const(i64 %a) { +; RV32-LABEL: 'rotl_i64_3rd_arg_const' +; RV32-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64-LABEL: 'rotl_i64_3rd_arg_const' +; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV32ZBB-LABEL: 'rotl_i64_3rd_arg_const' +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64ZBB-LABEL: 'rotl_i64_3rd_arg_const' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9) + ret i64 %r +} + +define i64 @rotl_i64_3rd_arg_var(i64 %a, i64 %c) { +; RV32-LABEL: 'rotl_i64_3rd_arg_var' +; RV32-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64-LABEL: 'rotl_i64_3rd_arg_var' +; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV32ZBB-LABEL: 'rotl_i64_3rd_arg_var' +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64ZBB-LABEL: 'rotl_i64_3rd_arg_var' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c) + ret i64 %r +} + +define i32 @rotr_i32_3rd_arg_const(i32 %a) { +; RV32-LABEL: 'rotr_i32_3rd_arg_const' +; RV32-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64-LABEL: 'rotr_i32_3rd_arg_const' +; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV32ZBB-LABEL: 'rotr_i32_3rd_arg_const' +; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64ZBB-LABEL: 'rotr_i32_3rd_arg_const' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9) + ret i32 %r +} + +define i32 @rotr_i32_3rd_arg_var(i32 %a, i32 %c) { +; RV32-LABEL: 'rotr_i32_3rd_arg_var' +; RV32-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64-LABEL: 'rotr_i32_3rd_arg_var' +; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV32ZBB-LABEL: 'rotr_i32_3rd_arg_var' +; RV32ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +; RV64ZBB-LABEL: 'rotr_i32_3rd_arg_var' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r +; +entry: + %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c) + ret i32 %r +} + +define i64 @rotr_i64_3rd_arg_const(i64 %a) { +; RV32-LABEL: 'rotr_i64_3rd_arg_const' +; RV32-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64-LABEL: 'rotr_i64_3rd_arg_const' +; RV64-NEXT: Cost Model: Found costs of 4 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV32ZBB-LABEL: 'rotr_i64_3rd_arg_const' +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64ZBB-LABEL: 'rotr_i64_3rd_arg_const' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9) + ret i64 %r +} + +define i64 @rotr_i64_3rd_arg_var(i64 %a, i64 %c) { +; RV32-LABEL: 'rotr_i64_3rd_arg_var' +; RV32-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) +; RV32-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64-LABEL: 'rotr_i64_3rd_arg_var' +; RV64-NEXT: Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) +; RV64-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV32ZBB-LABEL: 'rotr_i64_3rd_arg_var' +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) +; RV32ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +; RV64ZBB-LABEL: 'rotr_i64_3rd_arg_var' +; RV64ZBB-NEXT: Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) +; RV64ZBB-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r +; +entry: + %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c) + ret i64 %r +} diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll index e3305c0..5830a35 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll @@ -148,55 +148,38 @@ define void @vector_reverse() { } define void @vector_splice() { -; ARGBASED-LABEL: 'vector_splice' -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) -; ARGBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; TYPEBASED-LABEL: 'vector_splice' -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-LABEL: 'vector_splice' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %7 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %8 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %10 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <vscale x 16 x i1> @llvm.vector.splice.left.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <vscale x 8 x i1> @llvm.vector.splice.left.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <vscale x 4 x i1> @llvm.vector.splice.left.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <vscale x 2 x i1> @llvm.vector.splice.left.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'vector_splice' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <vscale x 16 x i1> @llvm.vector.splice.left.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <vscale x 8 x i1> @llvm.vector.splice.left.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <vscale x 4 x i1> @llvm.vector.splice.left.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <vscale x 2 x i1> @llvm.vector.splice.left.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll new file mode 100644 index 0000000..20749d0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV32 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV64 + +define void @masked_gather() { +; RV32-LABEL: 'masked_gather' +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64-LABEL: 'masked_gather' +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) + %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef) + %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) + %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef) + + %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef) + %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef) + %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) + %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) + %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef) + + %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef) + %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) + %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) + %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) + + %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef) + %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef) + %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) + %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) + %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef) + + %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef) + %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef) + %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) + %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) + %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) + %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) + + %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef) + %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef) + %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) + %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) + %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) + %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) + %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) + + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll index 997ec12..c60c372 100644 --- a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll @@ -1,86 +1,45 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED define void @masked_gather_aligned() { -; GENERIC-LABEL: 'masked_gather_aligned' -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; MAX256-LABEL: 'masked_gather_aligned' -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SUPPORTED-LABEL: 'masked_gather_aligned' +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; UNSUPPORTED-LABEL: 'masked_gather_aligned' ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) @@ -114,10 +73,6 @@ define void @masked_gather_aligned() { ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef) ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) ; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) @@ -157,32 +112,18 @@ define void @masked_gather_aligned() { %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef) %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef) - - %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) - %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) - %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) - %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) ret void } define void @masked_gather_aligned_f16() { -; GENERIC-LABEL: 'masked_gather_aligned_f16' -; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; MAX256-LABEL: 'masked_gather_aligned_f16' -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x half> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x half> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x half> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x half> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x half> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x half> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SUPPORTED-LABEL: 'masked_gather_aligned_f16' +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x half> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x half> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x half> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x half> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x half> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x half> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; UNSUPPORTED-LABEL: 'masked_gather_aligned_f16' ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x half> undef) @@ -229,10 +170,6 @@ define void @masked_gather_unaligned() { ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4I16.u = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef) @@ -264,11 +201,6 @@ define void @masked_gather_unaligned() { %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef) %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef) - %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) - %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) - %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) - %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) - ret void } @@ -291,3 +223,54 @@ define void @masked_gather_unaligned_f16() { ret void } + +define void @masked_gather_ptr_align4() { +; RV32-LABEL: 'masked_gather_ptr_align4' +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64-LABEL: 'masked_gather_ptr_align4' +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; UNSUPPORTED-LABEL: 'masked_gather_ptr_align4' +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) + %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) + %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) + %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) + ret void +} + +define void @masked_gather_ptr_align8() { +; SUPPORTED-LABEL: 'masked_gather_ptr_align8' +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; UNSUPPORTED-LABEL: 'masked_gather_ptr_align8' +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) +; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef) + %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef) + %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef) + %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef) + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll new file mode 100644 index 0000000..982c227 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV32 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV64 + +define void @masked_scatter() { +; RV32-LABEL: 'masked_scatter' +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64-LABEL: 'masked_scatter' +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef) + + call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef) + call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef) + + call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef) + + call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef) + call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef) + + call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef) + call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef) + call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef) + + call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef) + call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef) + call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef) + call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef) + + call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef) + + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll index 69abcde..6b8188a 100644 --- a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll @@ -1,86 +1,45 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64 +; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED -; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED define void @masked_scatter_aligned() { -; GENERIC-LABEL: 'masked_scatter_aligned' -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; MAX256-LABEL: 'masked_scatter_aligned' -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SUPPORTED-LABEL: 'masked_scatter_aligned' +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; UNSUPPORTED-LABEL: 'masked_scatter_aligned' ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) @@ -114,10 +73,6 @@ define void @masked_scatter_aligned() { ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef) ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef) ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) -; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) ; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) @@ -157,32 +112,20 @@ define void @masked_scatter_aligned() { call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef) call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef) - call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef) - call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef) - call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef) - call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef) - ret void } define void @masked_scatter_aligned_f16() { -; GENERIC-LABEL: 'masked_scatter_aligned_f16' -; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; MAX256-LABEL: 'masked_scatter_aligned_f16' -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SUPPORTED-LABEL: 'masked_scatter_aligned_f16' +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; UNSUPPORTED-LABEL: 'masked_scatter_aligned_f16' ; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef) @@ -229,10 +172,6 @@ define void @masked_scatter_unaligned() { ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef) @@ -264,11 +203,6 @@ define void @masked_scatter_unaligned() { call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef) call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef) - call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef) - call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef) - call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef) - call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef) - ret void } @@ -291,3 +225,56 @@ define void @masked_scatter_unaligned_f16() { ret void } + +define void @masked_scatter_ptr_align4() { +; RV32-LABEL: 'masked_scatter_ptr_align4' +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64-LABEL: 'masked_scatter_ptr_align4' +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; RV64-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; UNSUPPORTED-LABEL: 'masked_scatter_ptr_align4' +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef) + + ret void +} + +define void @masked_scatter_ptr_align8() { +; SUPPORTED-LABEL: 'masked_scatter_ptr_align8' +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; SUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; UNSUPPORTED-LABEL: 'masked_scatter_ptr_align8' +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) +; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef) + call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef) + call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef) + call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef) + + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/splice.ll b/llvm/test/Analysis/CostModel/RISCV/splice.ll index ddfaa8c..0dad42c 100644 --- a/llvm/test/Analysis/CostModel/RISCV/splice.ll +++ b/llvm/test/Analysis/CostModel/RISCV/splice.ll @@ -6,121 +6,121 @@ define void @vector_splice() { ; CHECK-LABEL: 'vector_splice' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <vscale x 1 x i8> @llvm.vector.splice.right.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <vscale x 2 x i8> @llvm.vector.splice.right.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <vscale x 4 x i8> @llvm.vector.splice.right.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <vscale x 8 x i8> @llvm.vector.splice.right.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 64 x i8> @llvm.vector.splice.right.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %12 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %13 = call <vscale x 32 x i16> @llvm.vector.splice.right.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %14 = call <vscale x 64 x i16> @llvm.vector.splice.right.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <vscale x 1 x i32> @llvm.vector.splice.right.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 2 x i32> @llvm.vector.splice.right.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %18 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %19 = call <vscale x 16 x i32> @llvm.vector.splice.right.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %20 = call <vscale x 32 x i32> @llvm.vector.splice.right.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %21 = call <vscale x 64 x i32> @llvm.vector.splice.right.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %24 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %25 = call <vscale x 8 x i64> @llvm.vector.splice.right.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %26 = call <vscale x 16 x i64> @llvm.vector.splice.right.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %27 = call <vscale x 32 x i64> @llvm.vector.splice.right.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %28 = call <vscale x 64 x i64> @llvm.vector.splice.right.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %33 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %34 = call <vscale x 32 x bfloat> @llvm.vector.splice.right.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %35 = call <vscale x 64 x bfloat> @llvm.vector.splice.right.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %40 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %41 = call <vscale x 32 x half> @llvm.vector.splice.right.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %42 = call <vscale x 64 x half> @llvm.vector.splice.right.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %43 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %46 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %47 = call <vscale x 16 x float> @llvm.vector.splice.right.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %48 = call <vscale x 32 x float> @llvm.vector.splice.right.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %49 = call <vscale x 64 x float> @llvm.vector.splice.right.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %52 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %53 = call <vscale x 8 x double> @llvm.vector.splice.right.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %54 = call <vscale x 16 x double> @llvm.vector.splice.right.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %55 = call <vscale x 32 x double> @llvm.vector.splice.right.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %56 = call <vscale x 64 x double> @llvm.vector.splice.right.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'vector_splice' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 -1) -; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 -1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <vscale x 1 x i8> @llvm.vector.splice.right.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <vscale x 2 x i8> @llvm.vector.splice.right.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <vscale x 4 x i8> @llvm.vector.splice.right.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <vscale x 8 x i8> @llvm.vector.splice.right.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 64 x i8> @llvm.vector.splice.right.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <vscale x 32 x i16> @llvm.vector.splice.right.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = call <vscale x 64 x i16> @llvm.vector.splice.right.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <vscale x 1 x i32> @llvm.vector.splice.right.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 2 x i32> @llvm.vector.splice.right.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 16 x i32> @llvm.vector.splice.right.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 32 x i32> @llvm.vector.splice.right.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call <vscale x 64 x i32> @llvm.vector.splice.right.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call <vscale x 8 x i64> @llvm.vector.splice.right.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call <vscale x 16 x i64> @llvm.vector.splice.right.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call <vscale x 32 x i64> @llvm.vector.splice.right.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %28 = call <vscale x 64 x i64> @llvm.vector.splice.right.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %29 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %30 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %31 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %32 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %33 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %34 = call <vscale x 32 x bfloat> @llvm.vector.splice.right.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %35 = call <vscale x 64 x bfloat> @llvm.vector.splice.right.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = call <vscale x 32 x half> @llvm.vector.splice.right.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %42 = call <vscale x 64 x half> @llvm.vector.splice.right.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %43 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call <vscale x 16 x float> @llvm.vector.splice.right.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call <vscale x 32 x float> @llvm.vector.splice.right.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %49 = call <vscale x 64 x float> @llvm.vector.splice.right.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %52 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = call <vscale x 8 x double> @llvm.vector.splice.right.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %54 = call <vscale x 16 x double> @llvm.vector.splice.right.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = call <vscale x 32 x double> @llvm.vector.splice.right.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 1) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %56 = call <vscale x 64 x double> @llvm.vector.splice.right.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 1) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 -1) @@ -189,3 +189,375 @@ define void @vector_splice() { ret void } + +define void @vector_splice_left_variable(i32 zeroext %offset) { +; CHECK-LABEL: 'vector_splice_left_variable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.left.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.left.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.left.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.left.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.left.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.left.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.left.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.left.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.left.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.left.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.left.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.left.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.left.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.left.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.left.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.left.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.left.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.left.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.left.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.left.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.left.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.left.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.left.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.left.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.left.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.left.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.left.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.left.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.left.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.left.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.left.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.left.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.left.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.left.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.left.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.left.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.left.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.left.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.left.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.left.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.left.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.left.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.left.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.left.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.left.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.left.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_splice_left_variable' +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.left.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.left.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.left.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.left.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.left.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.left.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.left.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.left.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.left.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.left.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.left.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.left.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.left.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.left.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.left.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.left.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.left.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.left.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.left.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.left.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.left.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.left.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.left.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.left.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.left.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.left.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.left.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.left.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.left.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.left.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.left.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.left.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.left.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.left.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.left.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.left.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.left.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.left.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.left.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.left.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.left.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.left.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.left.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.left.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.left.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.left.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.left.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 %offset) + %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.left.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 %offset) + %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.left.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 %offset) + %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.left.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 %offset) + %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.left.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 %offset) + %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.left.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 %offset) + %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.left.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 %offset) + + %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.left.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 %offset) + %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.left.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 %offset) + %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.left.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 %offset) + %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.left.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 %offset) + %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.left.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 %offset) + %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.left.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 %offset) + %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.left.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 %offset) + + %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.left.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 %offset) + %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.left.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 %offset) + %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.left.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) + %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.left.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 %offset) + %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.left.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 %offset) + %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.left.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 %offset) + %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.left.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 %offset) + + %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.left.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 %offset) + %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.left.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 %offset) + %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.left.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 %offset) + %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.left.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 %offset) + %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.left.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 %offset) + %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.left.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 %offset) + %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.left.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 %offset) + + %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.left.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.left.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.left.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.left.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.left.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.left.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.left.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 %offset) + + %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.left.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 %offset) + %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.left.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 %offset) + %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.left.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 %offset) + %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.left.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 %offset) + %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.left.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 %offset) + %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.left.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 %offset) + %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.left.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 %offset) + + %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.left.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 %offset) + %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.left.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 %offset) + %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.left.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 %offset) + %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.left.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 %offset) + %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.left.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 %offset) + %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.left.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 %offset) + %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.left.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 %offset) + + %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.left.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 %offset) + %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.left.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 %offset) + %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.left.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 %offset) + %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.left.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 %offset) + %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.left.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 %offset) + %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.left.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 %offset) + %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.left.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 %offset) + + ret void +} + +define void @vector_splice_right_variable(i32 zeroext %offset) { +; CHECK-LABEL: 'vector_splice_right_variable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.right.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.right.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.right.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.right.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.right.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.right.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.right.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.right.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.right.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.right.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.right.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.right.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.right.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.right.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.right.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.right.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.right.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.right.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.right.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.right.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.right.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.right.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.right.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.right.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.right.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.right.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.right.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 %offset) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SIZE-LABEL: 'vector_splice_right_variable' +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.right.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.right.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.right.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.right.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.right.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.right.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.right.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.right.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.right.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.right.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.right.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.right.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.right.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.right.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.right.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.right.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.right.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.right.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.right.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.right.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.right.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.right.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.right.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.right.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.right.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.right.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.right.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 %offset) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %splice.nxv1i8 = call <vscale x 1 x i8> @llvm.vector.splice.right.nxv1i8(<vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8> zeroinitializer, i32 %offset) + %splice.nxv2i8 = call <vscale x 2 x i8> @llvm.vector.splice.right.nxv2i8(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> zeroinitializer, i32 %offset) + %splice.nxv4i8 = call <vscale x 4 x i8> @llvm.vector.splice.right.nxv4i8(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> zeroinitializer, i32 %offset) + %splice.nxv8i8 = call <vscale x 8 x i8> @llvm.vector.splice.right.nxv8i8(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> zeroinitializer, i32 %offset) + %splice.nxv16i8 = call <vscale x 16 x i8> @llvm.vector.splice.right.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 %offset) + %splice.nxv32i8 = call <vscale x 32 x i8> @llvm.vector.splice.right.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 %offset) + %splice.nxv64i8 = call <vscale x 64 x i8> @llvm.vector.splice.right.nxv64i8(<vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> zeroinitializer, i32 %offset) + + %splice.nxv1i16 = call <vscale x 1 x i16> @llvm.vector.splice.right.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> zeroinitializer, i32 %offset) + %splice.nxv2i16 = call <vscale x 2 x i16> @llvm.vector.splice.right.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 %offset) + %splice.nxv4i16 = call <vscale x 4 x i16> @llvm.vector.splice.right.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 %offset) + %splice.nxv8i16 = call <vscale x 8 x i16> @llvm.vector.splice.right.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 %offset) + %splice.nxv16i16 = call <vscale x 16 x i16> @llvm.vector.splice.right.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 %offset) + %splice.nxv32i16 = call <vscale x 32 x i16> @llvm.vector.splice.right.nxv32i16(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, i32 %offset) + %splice.nxv64i16 = call <vscale x 64 x i16> @llvm.vector.splice.right.nxv64i16(<vscale x 64 x i16> zeroinitializer, <vscale x 64 x i16> zeroinitializer, i32 %offset) + + %splice.nxv1i32 = call <vscale x 1 x i32> @llvm.vector.splice.right.nxv1i32(<vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, i32 %offset) + %splice.nxv2i32 = call <vscale x 2 x i32> @llvm.vector.splice.right.nxv2i32(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i32 %offset) + %splice.nxv4i32 = call <vscale x 4 x i32> @llvm.vector.splice.right.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 %offset) + %splice.nxv8i32 = call <vscale x 8 x i32> @llvm.vector.splice.right.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 %offset) + %splice.nxv16i32 = call <vscale x 16 x i32> @llvm.vector.splice.right.nxv16i32(<vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> zeroinitializer, i32 %offset) + %splice.nxv32i32 = call <vscale x 32 x i32> @llvm.vector.splice.right.nxv32i32(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 %offset) + %splice.nxv64i32 = call <vscale x 64 x i32> @llvm.vector.splice.right.nxv64i32(<vscale x 64 x i32> zeroinitializer, <vscale x 64 x i32> zeroinitializer, i32 %offset) + + %splice.nxv1i64 = call <vscale x 1 x i64> @llvm.vector.splice.right.nxv1i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i64> zeroinitializer, i32 %offset) + %splice.nxv2i64 = call <vscale x 2 x i64> @llvm.vector.splice.right.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 %offset) + %splice.nxv4i64 = call <vscale x 4 x i64> @llvm.vector.splice.right.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 %offset) + %splice.nxv8i64 = call <vscale x 8 x i64> @llvm.vector.splice.right.nxv8i64(<vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> zeroinitializer, i32 %offset) + %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.right.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 %offset) + %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.right.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 %offset) + %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.right.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 %offset) + + %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.right.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.right.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.right.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.right.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.right.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.right.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 %offset) + %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.right.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 %offset) + + %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.right.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 %offset) + %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.right.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 %offset) + %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.right.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 %offset) + %splice.nxv8f16 = call <vscale x 8 x half> @llvm.vector.splice.right.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 %offset) + %splice.nxv16f16 = call <vscale x 16 x half> @llvm.vector.splice.right.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 %offset) + %splice.nxv32f16 = call <vscale x 32 x half> @llvm.vector.splice.right.nxv32f16(<vscale x 32 x half> zeroinitializer, <vscale x 32 x half> zeroinitializer, i32 %offset) + %splice.nxv64f16 = call <vscale x 64 x half> @llvm.vector.splice.right.nxv64f16(<vscale x 64 x half> zeroinitializer, <vscale x 64 x half> zeroinitializer, i32 %offset) + + %splice.nxv1f32 = call <vscale x 1 x float> @llvm.vector.splice.right.nxv1f32(<vscale x 1 x float> zeroinitializer, <vscale x 1 x float> zeroinitializer, i32 %offset) + %splice.nxv2f32 = call <vscale x 2 x float> @llvm.vector.splice.right.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 %offset) + %splice.nxv4f32 = call <vscale x 4 x float> @llvm.vector.splice.right.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 %offset) + %splice.nxv8f32 = call <vscale x 8 x float> @llvm.vector.splice.right.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 %offset) + %splice.nxv16f32 = call <vscale x 16 x float> @llvm.vector.splice.right.nxv16f32(<vscale x 16 x float> zeroinitializer, <vscale x 16 x float> zeroinitializer, i32 %offset) + %splice.nxv32f32 = call <vscale x 32 x float> @llvm.vector.splice.right.nxv32f32(<vscale x 32 x float> zeroinitializer, <vscale x 32 x float> zeroinitializer, i32 %offset) + %splice.nxv64f32 = call <vscale x 64 x float> @llvm.vector.splice.right.nxv64f32(<vscale x 64 x float> zeroinitializer, <vscale x 64 x float> zeroinitializer, i32 %offset) + + %splice.nxv1f64 = call <vscale x 1 x double> @llvm.vector.splice.right.nxv1f64(<vscale x 1 x double> zeroinitializer, <vscale x 1 x double> zeroinitializer, i32 %offset) + %splice.nxv2f64 = call <vscale x 2 x double> @llvm.vector.splice.right.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 %offset) + %splice.nxv4f64 = call <vscale x 4 x double> @llvm.vector.splice.right.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 %offset) + %splice.nxv8f64 = call <vscale x 8 x double> @llvm.vector.splice.right.nxv8f64(<vscale x 8 x double> zeroinitializer, <vscale x 8 x double> zeroinitializer, i32 %offset) + %splice.nxv16f64 = call <vscale x 16 x double> @llvm.vector.splice.right.nxv16f64(<vscale x 16 x double> zeroinitializer, <vscale x 16 x double> zeroinitializer, i32 %offset) + %splice.nxv32f64 = call <vscale x 32 x double> @llvm.vector.splice.right.nxv32f64(<vscale x 32 x double> zeroinitializer, <vscale x 32 x double> zeroinitializer, i32 %offset) + %splice.nxv64f64 = call <vscale x 64 x double> @llvm.vector.splice.right.nxv64f64(<vscale x 64 x double> zeroinitializer, <vscale x 64 x double> zeroinitializer, i32 %offset) + + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 71746ca..9e8f727 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -836,74 +836,74 @@ define void @abs() { ret void } -define void @load() { +define void @load(ptr %src) { ; CHECK-LABEL: 'load' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = load <vscale x 2 x i8>, ptr undef, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = load <vscale x 4 x i8>, ptr undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = load <vscale x 8 x i8>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = load <vscale x 16 x i8>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = load <vscale x 2 x i64>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = load <vscale x 4 x i64>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = load <vscale x 8 x i64>, ptr undef, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = load <vscale x 16 x i64>, ptr undef, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef) - %t1 = load <2 x i8>, ptr undef - %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef) - %t3 = load <4 x i8>, ptr undef - %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef) - %t5 = load <8 x i8>, ptr undef - %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef) - %t7 = load <16 x i8>, ptr undef - %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef) - %t9 = load <2 x i64>, ptr undef - %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef) - %t12 = load <4 x i64>, ptr undef - %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef) - %t14 = load <8 x i64>, ptr undef - %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef) - %t16 = load <16 x i64>, ptr undef - %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr undef, <vscale x 2 x i1> undef, i32 undef) - %t18 = load <vscale x 2 x i8>, ptr undef - %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr undef, <vscale x 4 x i1> undef, i32 undef) - %t20 = load <vscale x 4 x i8>, ptr undef - %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr undef, <vscale x 8 x i1> undef, i32 undef) - %t22 = load <vscale x 8 x i8>, ptr undef - %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr undef, <vscale x 16 x i1> undef, i32 undef) - %t24 = load <vscale x 16 x i8>, ptr undef - %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr undef, <vscale x 2 x i1> undef, i32 undef) - %t26 = load <vscale x 2 x i64>, ptr undef - %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr undef, <vscale x 4 x i1> undef, i32 undef) - %t28 = load <vscale x 4 x i64>, ptr undef - %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr undef, <vscale x 8 x i1> undef, i32 undef) - %t30 = load <vscale x 8 x i64>, ptr undef - %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr undef, <vscale x 16 x i1> undef, i32 undef) - %t32 = load <vscale x 16 x i64>, ptr undef + %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef) + %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef) + %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef) + %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef) + %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef) + %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef) + %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef) + %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef) + %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef) + %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef) + %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef) + %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef) + %t12 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef) + %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef) + %t14 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef) + %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef) + %t16 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr %src, <vscale x 2 x i1> undef, i32 undef) + %t17 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef) + %t18 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr %src, <vscale x 4 x i1> undef, i32 undef) + %t19 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef) + %t20 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr %src, <vscale x 8 x i1> undef, i32 undef) + %t21 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef) + %t22 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr %src, <vscale x 16 x i1> undef, i32 undef) + %t23 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef) + %t24 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr %src, <vscale x 2 x i1> undef, i32 undef) + %t25 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef) + %t26 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr %src, <vscale x 4 x i1> undef, i32 undef) + %t27 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef) + %t28 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr %src, <vscale x 8 x i1> undef, i32 undef) + %t29 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef) + %t30 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr %src, <vscale x 16 x i1> undef, i32 undef) + %t31 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef) ret void } @@ -1579,157 +1579,6 @@ define void @vp_fdiv(){ ret void } -define void @splat() { -; CHECK-LABEL: 'splat' -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %1 = call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.splat.nxv2i1(i1 undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.splat.nxv4i1(i1 undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.splat.nxv8i1(i1 undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.splat.nxv2i8(i8 undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.splat.nxv4i8(i8 undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.splat.nxv8i8(i8 undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.splat.nxv16i8(i8 undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.splat.nxv2i16(i16 undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.splat.nxv4i16(i16 undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.splat.nxv8i16(i16 undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.splat.nxv16i16(i16 undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.splat.nxv2i32(i32 undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(i32 undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.splat.nxv8i32(i32 undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.splat.nxv16i32(i32 undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.splat.nxv2i64(i64 undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.splat.nxv4i64(i64 undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.splat.nxv8i64(i64 undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.splat.nxv16i64(i64 undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.splat.nxv2f16(half undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.splat.nxv4f16(half undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.splat.nxv8f16(half undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.splat.nxv16f16(half undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.splat.nxv2f32(float undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.splat.nxv4f32(float undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.splat.nxv8f32(float undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.splat.nxv16f32(float undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.splat.nxv2f64(double undef, <vscale x 2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.splat.nxv4f64(double undef, <vscale x 4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.splat.nxv8f64(double undef, <vscale x 8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.splat.nxv16f64(double undef, <vscale x 16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) - call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) - call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) - call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) - call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) - call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) - call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) - call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) - call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) - call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) - call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) - call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) - call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) - call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) - call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) - call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) - call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) - call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) - call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) - call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) - call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) - call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) - call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) - call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) - call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) - call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) - call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) - call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) - call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) - call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) - call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) - call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) - call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) - call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) - call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) - call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) - call <vscale x 2 x i1> @llvm.experimental.vp.splat.nxv2i1(i1 undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x i1> @llvm.experimental.vp.splat.nxv4i1(i1 undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x i1> @llvm.experimental.vp.splat.nxv8i1(i1 undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x i8> @llvm.experimental.vp.splat.nxv2i8(i8 undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x i8> @llvm.experimental.vp.splat.nxv4i8(i8 undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x i8> @llvm.experimental.vp.splat.nxv8i8(i8 undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x i8> @llvm.experimental.vp.splat.nxv16i8(i8 undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x i16> @llvm.experimental.vp.splat.nxv2i16(i16 undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x i16> @llvm.experimental.vp.splat.nxv4i16(i16 undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x i16> @llvm.experimental.vp.splat.nxv8i16(i16 undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x i16> @llvm.experimental.vp.splat.nxv16i16(i16 undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x i32> @llvm.experimental.vp.splat.nxv2i32(i32 undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(i32 undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x i32> @llvm.experimental.vp.splat.nxv8i32(i32 undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x i32> @llvm.experimental.vp.splat.nxv16i32(i32 undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x i64> @llvm.experimental.vp.splat.nxv2i64(i64 undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x i64> @llvm.experimental.vp.splat.nxv4i64(i64 undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x i64> @llvm.experimental.vp.splat.nxv8i64(i64 undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x i64> @llvm.experimental.vp.splat.nxv16i64(i64 undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x bfloat> @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x bfloat> @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x bfloat> @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x bfloat> @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x half> @llvm.experimental.vp.splat.nxv2f16(half undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x half> @llvm.experimental.vp.splat.nxv4f16(half undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x half> @llvm.experimental.vp.splat.nxv8f16(half undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x half> @llvm.experimental.vp.splat.nxv16f16(half undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x float> @llvm.experimental.vp.splat.nxv2f32(float undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x float> @llvm.experimental.vp.splat.nxv4f32(float undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x float> @llvm.experimental.vp.splat.nxv8f32(float undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x float> @llvm.experimental.vp.splat.nxv16f32(float undef, <vscale x 16 x i1> undef, i32 undef) - call <vscale x 2 x double> @llvm.experimental.vp.splat.nxv2f64(double undef, <vscale x 2 x i1> undef, i32 undef) - call <vscale x 4 x double> @llvm.experimental.vp.splat.nxv4f64(double undef, <vscale x 4 x i1> undef, i32 undef) - call <vscale x 8 x double> @llvm.experimental.vp.splat.nxv8f64(double undef, <vscale x 8 x i1> undef, i32 undef) - call <vscale x 16 x double> @llvm.experimental.vp.splat.nxv16f64(double undef, <vscale x 16 x i1> undef, i32 undef) - ret void -} - define void @splice() { ; CHECK-LABEL: 'splice' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vp.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison) diff --git a/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll b/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll index 245e8f7..058370c 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll @@ -23,10 +23,10 @@ %"class.llvm::Metadata.306.1758.9986.10470.10954.11438.11922.12406.12890.13374.13858.15310.15794.16278.17730.19182.21118.25958.26926.29346.29830.30314.30798.31282.31766.32250.32734.33702.36606.38058.41638" = type { i8, i8, i16, i32 } ; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.end(ptr nocapture) #0 +declare void @llvm.lifetime.end(ptr nocapture) ; Function Attrs: nounwind ssp uwtable -define hidden void @fun(ptr %N, i1 %arg) #1 align 2 { +define hidden void @fun(ptr %N, i1 %arg) align 2 { ; CHECK: define entry: %NumOperands.i = getelementptr inbounds %"class.llvm::SDNode.310.1762.9990.10474.10958.11442.11926.12410.12894.13378.13862.15314.15798.16282.17734.19186.21122.25962.26930.29350.29834.30318.30802.31286.31770.32254.32738.33706.36610.38062.41642", ptr %N, i64 0, i32 8 @@ -47,8 +47,6 @@ for.body: ; preds = %for.body, %for.body br i1 %exitcond193, label %for.cond.cleanup, label %for.body } -attributes #0 = { argmemonly nounwind } -attributes #1 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index a7a88b8..8f9a47c 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -625,80 +625,80 @@ define i32 @fdiv(i32 %arg) { define i32 @frem(i32 %arg) { ; SSE1-LABEL: 'frem' -; SSE1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SSE1-NEXT: Cost Model: Found costs of 4 for: %V2F64 = frem <2 x double> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SSE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SSE1-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SSE1-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SSE1-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SSE1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SSE2-LABEL: 'frem' -; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SSE42-LABEL: 'frem' -; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SSE42-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SSE42-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; AVX-LABEL: 'frem' -; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:46 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; AVX-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:78 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; AVX512-LABEL: 'frem' -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:47 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; AVX512-NEXT: Cost Model: Found costs of RThru:23 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:79 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found costs of RThru:39 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; SLM-LABEL: 'frem' -; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; SLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; SLM-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; SLM-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; SLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; SLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; GLM-LABEL: 'frem' -; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F32 = frem float undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:11 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:44 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F64 = frem double undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef -; GLM-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef +; GLM-NEXT: Cost Model: Found costs of 4 for: %F32 = frem float undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:19 CodeSize:4 Lat:4 SizeLat:4 for: %V4F32 = frem <4 x float> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:38 CodeSize:4 Lat:4 SizeLat:4 for: %V8F32 = frem <8 x float> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:76 CodeSize:4 Lat:4 SizeLat:4 for: %V16F32 = frem <16 x float> undef, undef +; GLM-NEXT: Cost Model: Found costs of 4 for: %F64 = frem double undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:4 Lat:4 SizeLat:4 for: %V2F64 = frem <2 x double> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:18 CodeSize:4 Lat:4 SizeLat:4 for: %V4F64 = frem <4 x double> undef, undef +; GLM-NEXT: Cost Model: Found costs of RThru:36 CodeSize:4 Lat:4 SizeLat:4 for: %V8F64 = frem <8 x double> undef, undef ; GLM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %F32 = frem float undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/clmul.ll b/llvm/test/Analysis/CostModel/X86/clmul.ll new file mode 100644 index 0000000..1ce85f9 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/clmul.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=+pclmul | FileCheck %s --check-prefix=PCLMUL +; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-unknown-linux-gnu -mattr=-pclmul | FileCheck %s --check-prefix=NO-PCLMUL + + +define void @clmul(i128 %a128, i128 %b128, i64 %a64, i64 %b64, i32 %a32, i32 %b32, i16 %a16, i16 %b16, i8 %a8, i8 %b8) { +; PCLMUL-LABEL: 'clmul' +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %call_i16 = call i16 @llvm.clmul.i16(i16 %a16, i16 %b16) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-PCLMUL-LABEL: 'clmul' +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %call_i16 = call i16 @llvm.clmul.i16(i16 %a16, i16 %b16) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %call_i128 = call i128 @llvm.clmul.i128(i128 %a128, i128 %b128) + %call_i64 = call i64 @llvm.clmul.i64(i64 %a64, i64 %b64) + %call_i32 = call i32 @llvm.clmul.i32(i32 %a32, i32 %b32) + %call_i16 = call i16 @llvm.clmul.i16(i16 %a16, i16 %b16) + %call_i8 = call i8 @llvm.clmul.i8(i8 %a8, i8 %b8) + ret void +} + +define void @clmul_128(<1 x i128> %a128, <1 x i128> %b128, <2 x i64> %a64, <2 x i64> %b64, <4 x i32> %a32, <4 x i32> %b32, <8 x i16> %a16, <8 x i16> %b16, <16 x i8> %a8, <16 x i8> %b8) { +; PCLMUL-LABEL: 'clmul_128' +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %call_i128 = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %a128, <1 x i128> %b128) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %call_i64 = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %a64, <2 x i64> %b64) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %call_i32 = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a32, <4 x i32> %b32) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %call_i16 = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %a16, <8 x i16> %b16) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %call_i8 = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %a8, <16 x i8> %b8) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-PCLMUL-LABEL: 'clmul_128' +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %call_i128 = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %a128, <1 x i128> %b128) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %call_i64 = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %a64, <2 x i64> %b64) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %call_i32 = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a32, <4 x i32> %b32) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %call_i16 = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %a16, <8 x i16> %b16) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %call_i8 = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %a8, <16 x i8> %b8) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %call_i128 = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %a128, <1 x i128> %b128) + %call_i64 = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %a64, <2 x i64> %b64) + %call_i32 = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a32, <4 x i32> %b32) + %call_i16 = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %a16, <8 x i16> %b16) + %call_i8 = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %a8, <16 x i8> %b8) + ret void +} + +define void @clmul_256(<2 x i128> %a128, <2 x i128> %b128, <4 x i64> %a64, <4 x i64> %b64, <8 x i32> %a32, <8 x i32> %b32, <16 x i16> %a16, <16 x i16> %b16, <32 x i8> %a8, <32 x i8> %b8) { +; PCLMUL-LABEL: 'clmul_256' +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %call_i128 = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a128, <2 x i128> %b128) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %call_i64 = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %a64, <4 x i64> %b64) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %call_i32 = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %a32, <8 x i32> %b32) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %call_i16 = call <16 x i16> @llvm.clmul.v16i16(<16 x i16> %a16, <16 x i16> %b16) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %call_i8 = call <32 x i8> @llvm.clmul.v32i8(<32 x i8> %a8, <32 x i8> %b8) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-PCLMUL-LABEL: 'clmul_256' +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %call_i128 = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a128, <2 x i128> %b128) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %call_i64 = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %a64, <4 x i64> %b64) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %call_i32 = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %a32, <8 x i32> %b32) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %call_i16 = call <16 x i16> @llvm.clmul.v16i16(<16 x i16> %a16, <16 x i16> %b16) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %call_i8 = call <32 x i8> @llvm.clmul.v32i8(<32 x i8> %a8, <32 x i8> %b8) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %call_i128 = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a128, <2 x i128> %b128) + %call_i64 = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %a64, <4 x i64> %b64) + %call_i32 = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %a32, <8 x i32> %b32) + %call_i16 = call <16 x i16> @llvm.clmul.v16i16(<16 x i16> %a16, <16 x i16> %b16) + %call_i8 = call <32 x i8> @llvm.clmul.v32i8(<32 x i8> %a8, <32 x i8> %b8) + ret void +} + +define void @clmul_512(<4 x i128> %a128, <4 x i128> %b128, <8 x i64> %a64, <8 x i64> %b64, <16 x i32> %a32, <16 x i32> %b32, <32 x i16> %a16, <32 x i16> %b16, <64 x i8> %a8, <64 x i8> %b8) { +; PCLMUL-LABEL: 'clmul_512' +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %call_i128 = call <4 x i128> @llvm.clmul.v4i128(<4 x i128> %a128, <4 x i128> %b128) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %call_i64 = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %a64, <8 x i64> %b64) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %call_i32 = call <16 x i32> @llvm.clmul.v16i32(<16 x i32> %a32, <16 x i32> %b32) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %call_i16 = call <32 x i16> @llvm.clmul.v32i16(<32 x i16> %a16, <32 x i16> %b16) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %call_i8 = call <64 x i8> @llvm.clmul.v64i8(<64 x i8> %a8, <64 x i8> %b8) +; PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; NO-PCLMUL-LABEL: 'clmul_512' +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %call_i128 = call <4 x i128> @llvm.clmul.v4i128(<4 x i128> %a128, <4 x i128> %b128) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 2304 for instruction: %call_i64 = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %a64, <8 x i64> %b64) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %call_i32 = call <16 x i32> @llvm.clmul.v16i32(<16 x i32> %a32, <16 x i32> %b32) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %call_i16 = call <32 x i16> @llvm.clmul.v32i16(<32 x i16> %a16, <32 x i16> %b16) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %call_i8 = call <64 x i8> @llvm.clmul.v64i8(<64 x i8> %a8, <64 x i8> %b8) +; NO-PCLMUL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %call_i128 = call <4 x i128> @llvm.clmul.v4i128(<4 x i128> %a128, <4 x i128> %b128) + %call_i64 = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %a64, <8 x i64> %b64) + %call_i32 = call <16 x i32> @llvm.clmul.v16i32(<16 x i32> %a32, <16 x i32> %b32) + %call_i16 = call <32 x i16> @llvm.clmul.v32i16(<32 x i16> %a16, <32 x i16> %b16) + %call_i8 = call <64 x i8> @llvm.clmul.v64i8(<64 x i8> %a8, <64 x i8> %b8) + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll index c2e1e92..44d3a7c 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll @@ -25,13 +25,13 @@ define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a2 ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 0, i32 2> -; AVX-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> -; AVX-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +; AVX-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> +; AVX-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXf64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> <i32 0, i32 2> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> +; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; @@ -50,13 +50,13 @@ define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 0, i32 2> -; AVX-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> -; AVX-NEXT: Cost Model: Found costs of 4 for: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +; AVX-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> +; AVX-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; AVX512-LABEL: 'test_vXi64' ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> <i32 0, i32 2> -; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> +; AVX512-NEXT: Cost Model: Found costs of 1 for: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> <i32 0, i32 4, i32 2, i32 6> ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/vscale-insertelement-crash.ll b/llvm/test/Analysis/CostModel/X86/vscale-insertelement-crash.ll new file mode 100644 index 0000000..d28ed67 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vscale-insertelement-crash.ll @@ -0,0 +1,10 @@ +; RUN: opt -passes="print<cost-model>" -disable-output < %s +; This test triggers a crash in X86 TTI with scalable vectors + +target triple = "x86_64-unknown-linux-gnu" + +define <vscale x 1 x i64> @test(i64 %x) { +entry: + %v = insertelement <vscale x 1 x i64> poison, i64 %x, i64 0 + ret <vscale x 1 x i64> %v +} |
