diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-07-13 21:07:35 -0400 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2024-07-02 21:28:02 +0200 |
commit | fd197af9717589741108795b729627658211b52a (patch) | |
tree | 75d96c076e39ca8e8ee9cd0bcff0c31668f06994 | |
parent | 5f1743cd074cc7d45744d1acc8db379513b4501c (diff) | |
download | llvm-users/arsenm/add-sub-sat-cost-model-cleanup.zip llvm-users/arsenm/add-sub-sat-cost-model-cleanup.tar.gz llvm-users/arsenm/add-sub-sat-cost-model-cleanup.tar.bz2 |
TTI: Fix special casing vectorization costs of saturating add/subusers/arsenm/add-sub-sat-cost-model-cleanup
22 files changed, 3604 insertions, 3302 deletions
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 4f1dc9f..2e0f5ee 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2129,45 +2129,17 @@ public: return Cost; } case Intrinsic::sadd_sat: - case Intrinsic::ssub_sat: { - Type *CondTy = RetTy->getWithNewBitWidth(1); - - Type *OpTy = StructType::create({RetTy, CondTy}); - Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat - ? Intrinsic::sadd_with_overflow - : Intrinsic::ssub_with_overflow; - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - - // SatMax -> Overflow && SumDiff < 0 - // SatMin -> Overflow && SumDiff >= 0 - InstructionCost Cost = 0; - IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, - nullptr, ScalarizationCostPassed); - Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, Pred, CostKind); - return Cost; - } + ISD = ISD::SADDSAT; + break; + case Intrinsic::ssub_sat: + ISD = ISD::SSUBSAT; + break; case Intrinsic::uadd_sat: - case Intrinsic::usub_sat: { - Type *CondTy = RetTy->getWithNewBitWidth(1); - - Type *OpTy = StructType::create({RetTy, CondTy}); - Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat - ? Intrinsic::uadd_with_overflow - : Intrinsic::usub_with_overflow; - - InstructionCost Cost = 0; - IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, - nullptr, ScalarizationCostPassed); - Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - return Cost; - } + ISD = ISD::UADDSAT; + break; + case Intrinsic::usub_sat: + ISD = ISD::USUBSAT; + break; case Intrinsic::smul_fix: case Intrinsic::umul_fix: { unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll index 2267d9b..d79eee5 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll @@ -33,23 +33,23 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; RECIP-LABEL: 'add' -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -59,23 +59,23 @@ define i32 @add(i32 %arg) { ; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'add' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -142,22 +142,22 @@ declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; RECIP-LABEL: 'sub' -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -167,22 +167,22 @@ define i32 @sub(i32 %arg) { ; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'sub' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll index 5a131f2..c022a79 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll @@ -32,22 +32,22 @@ declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; RECIP-LABEL: 'add' -; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -57,22 +57,22 @@ define i32 @add(i32 %arg) { ; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'add' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -138,22 +138,22 @@ declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; RECIP-LABEL: 'sub' -; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -163,22 +163,22 @@ define i32 @sub(i32 %arg) { ; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'sub' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 1ff280d..07e63e4 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -691,8 +691,8 @@ define void @get_lane_mask() #0 { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) @@ -701,8 +701,8 @@ define void @get_lane_mask() #0 { ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask' diff --git a/llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll index cdd61c1..204f05a 100644 --- a/llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll +++ b/llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll @@ -3,18 +3,18 @@ define void @get_lane_mask() { ; CHECK-LABEL: 'get_lane_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll index ba1d163..804dc89 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll @@ -37,72 +37,72 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; V8M-RECIP-LABEL: 'add' ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-RECIP-LABEL: 'add' -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'add' -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -111,73 +111,73 @@ define i32 @add(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'add' -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'add' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'add' -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -243,72 +243,72 @@ declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; V8M-RECIP-LABEL: 'sub' ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-RECIP-LABEL: 'sub' -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'sub' -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -317,73 +317,73 @@ define i32 @sub(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'sub' -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'sub' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'sub' -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll index 3b755f9..3575b74 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll @@ -36,73 +36,73 @@ declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; V8M-RECIP-LABEL: 'add' -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-RECIP-LABEL: 'add' -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'add' -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -111,73 +111,73 @@ define i32 @add(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'add' -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'add' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'add' -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -242,73 +242,73 @@ declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; V8M-RECIP-LABEL: 'sub' -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-RECIP-LABEL: 'sub' -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'sub' -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -317,73 +317,73 @@ define i32 @sub(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'sub' -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'sub' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'sub' -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll index 541534d..cdf33a1 100644 --- a/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll +++ b/llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll @@ -3,7 +3,7 @@ define void @sadd.sat() { ; CHECK-LABEL: 'sadd.sat' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -12,7 +12,7 @@ define void @sadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.sadd.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.sadd.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x i8> @llvm.sadd.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) @@ -21,7 +21,7 @@ define void @sadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 4 x i16> @llvm.sadd.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 8 x i16> @llvm.sadd.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 16 x i16> @llvm.sadd.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %19 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -30,7 +30,7 @@ define void @sadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 4 x i32> @llvm.sadd.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 8 x i32> @llvm.sadd.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 16 x i32> @llvm.sadd.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) @@ -80,7 +80,7 @@ define void @sadd.sat() { define void @uadd.sat() { ; CHECK-LABEL: 'uadd.sat' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -89,7 +89,7 @@ define void @uadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.uadd.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.uadd.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x i8> @llvm.uadd.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) @@ -98,7 +98,7 @@ define void @uadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 4 x i16> @llvm.uadd.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 8 x i16> @llvm.uadd.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 16 x i16> @llvm.uadd.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %19 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -107,7 +107,7 @@ define void @uadd.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 4 x i32> @llvm.uadd.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 8 x i32> @llvm.uadd.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 16 x i32> @llvm.uadd.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) @@ -157,7 +157,7 @@ define void @uadd.sat() { define void @usub.sat() { ; CHECK-LABEL: 'usub.sat' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -166,7 +166,7 @@ define void @usub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.usub.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.usub.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x i8> @llvm.usub.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) @@ -175,7 +175,7 @@ define void @usub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 4 x i16> @llvm.usub.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 8 x i16> @llvm.usub.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 16 x i16> @llvm.usub.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %19 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -184,7 +184,7 @@ define void @usub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 4 x i32> @llvm.usub.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 8 x i32> @llvm.usub.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 16 x i32> @llvm.usub.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) @@ -234,7 +234,7 @@ define void @usub.sat() { define void @ssub.sat() { ; CHECK-LABEL: 'ssub.sat' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) @@ -243,7 +243,7 @@ define void @ssub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.ssub.sat.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.ssub.sat.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x i8> @llvm.ssub.sat.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) @@ -252,7 +252,7 @@ define void @ssub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 4 x i16> @llvm.ssub.sat.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 8 x i16> @llvm.ssub.sat.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 16 x i16> @llvm.ssub.sat.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %19 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -261,7 +261,7 @@ define void @ssub.sat() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 4 x i32> @llvm.ssub.sat.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 8 x i32> @llvm.ssub.sat.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 16 x i32> @llvm.ssub.sat.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll index 21c9955..669456b 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll @@ -34,212 +34,212 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) @@ -287,212 +287,212 @@ declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll index 1d398d9..659e458 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll @@ -34,212 +34,212 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) @@ -287,212 +287,212 @@ declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll index 4d1fa7c..bdc98a3 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll @@ -34,212 +34,212 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) @@ -287,212 +287,212 @@ declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll index 4bbd28e..52f7514 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll @@ -34,209 +34,209 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) @@ -287,209 +287,209 @@ declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-usat-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-usat-codesize.ll index e92b810..3a73145 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-usat-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-usat-codesize.ll @@ -34,212 +34,212 @@ declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) @@ -287,212 +287,212 @@ declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-usat-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-usat-latency.ll index 9d9e584..bd03151 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-usat-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-usat-latency.ll @@ -34,212 +34,212 @@ declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) @@ -287,212 +287,212 @@ declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-usat-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-usat-sizelatency.ll index 1912822..bce002c 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-usat-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-usat-sizelatency.ll @@ -34,212 +34,212 @@ declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) @@ -287,212 +287,212 @@ declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-usat.ll b/llvm/test/Analysis/CostModel/X86/arith-usat.ll index 75950e4..6534176 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-usat.ll @@ -34,209 +34,209 @@ declare <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @add(i32 %arg) { ; SSE2-LABEL: 'add' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'add' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'add' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'add' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'add' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'add' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'add' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'add' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'add' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'add' -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'add' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) @@ -287,209 +287,209 @@ declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; SSE2-LABEL: 'sub' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sub' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sub' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sub' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sub' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sub' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sub' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sub' -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'sub' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'sub' -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sub' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index 0f26092..a60f246 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" ; CHECK-COST-LABEL: sadd -; CHECK-COST: Found an estimated cost of 6 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) +; CHECK-COST: Found an estimated cost of 10 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 4 for VF 2 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 1 for VF 4 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 1 for VF 8 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll index 0515cce..44d17d0 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll @@ -7,8 +7,8 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK-COST-LABEL: arm_offset_q15 -; CHECK-COST: Found an estimated cost of 7 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) -; CHECK-COST: Found an estimated cost of 36 for VF 2 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) +; CHECK-COST: Found an estimated cost of 1 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) +; CHECK-COST: Found an estimated cost of 26 for VF 2 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 8 for VF 4 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 2 for VF 8 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll index 24c5fcb..1e8d45c8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll @@ -62,75 +62,60 @@ define void @add_v8i64() { ; SSE-NEXT: ret void ; ; SLM-LABEL: @add_v8i64( -; SLM-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8 -; SLM-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 -; SLM-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 -; SLM-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 -; SLM-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; SLM-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 -; SLM-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 -; SLM-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 -; SLM-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8 -; SLM-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 -; SLM-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 -; SLM-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 -; SLM-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; SLM-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 -; SLM-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 -; SLM-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 -; SLM-NEXT: [[R0:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A0]], i64 [[B0]]) -; SLM-NEXT: [[R1:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A1]], i64 [[B1]]) -; SLM-NEXT: [[R2:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A2]], i64 [[B2]]) -; SLM-NEXT: [[R3:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A3]], i64 [[B3]]) -; SLM-NEXT: [[R4:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A4]], i64 [[B4]]) -; SLM-NEXT: [[R5:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A5]], i64 [[B5]]) -; SLM-NEXT: [[R6:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A6]], i64 [[B6]]) -; SLM-NEXT: [[R7:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A7]], i64 [[B7]]) -; SLM-NEXT: store i64 [[R0]], ptr @c64, align 8 -; SLM-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8 -; SLM-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 -; SLM-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8 -; SLM-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; SLM-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8 -; SLM-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 -; SLM-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8 +; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 +; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 +; SLM-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; SLM-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 +; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; SLM-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) +; SLM-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) +; SLM-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; SLM-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; SLM-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) +; SLM-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 ; SLM-NEXT: ret void ; -; AVX1-LABEL: @add_v8i64( -; AVX1-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 -; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 -; AVX1-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -; AVX1-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 -; AVX1-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 -; AVX1-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 -; AVX1-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) -; AVX1-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 -; AVX1-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; AVX1-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; AVX1-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) -; AVX1-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; AVX1-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 -; AVX1-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 -; AVX1-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) -; AVX1-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 -; AVX1-NEXT: ret void -; -; AVX2-LABEL: @add_v8i64( -; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8 -; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8 -; AVX2-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]]) -; AVX2-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8 -; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; AVX2-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]]) -; AVX2-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; AVX2-NEXT: ret void +; AVX-LABEL: @add_v8i64( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 +; AVX-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 +; AVX-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; AVX-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 +; AVX-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; AVX-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; AVX-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) +; AVX-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; AVX-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) +; AVX-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; AVX-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; AVX-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) +; AVX-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 +; AVX-NEXT: ret void ; ; AVX512-LABEL: @add_v8i64( -; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8 -; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8 -; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]]) -; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8 +; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 +; AVX512-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 +; AVX512-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; AVX512-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 +; AVX512-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; AVX512-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; AVX512-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) +; AVX512-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; AVX512-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; AVX512-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; AVX512-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) +; AVX512-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; AVX512-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; AVX512-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; AVX512-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) +; AVX512-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 ; AVX512-NEXT: ret void ; ; AVX256BW-LABEL: @add_v8i64( @@ -180,59 +165,271 @@ define void @add_v8i64() { define void @add_v16i32() { ; SSE-LABEL: @add_v16i32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 -; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4 -; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) -; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) -; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) -; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; SSE-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; SSE-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; SSE-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; SSE-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; SSE-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; SSE-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; SSE-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; SSE-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; SSE-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; SSE-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; SSE-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; SSE-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; SSE-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; SSE-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; SSE-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; SSE-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; SSE-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; SSE-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; SSE-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; SSE-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; SSE-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; SSE-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; SSE-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; SSE-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; SSE-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; SSE-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; SSE-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; SSE-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; SSE-NEXT: [[R0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A0]], i32 [[B0]]) +; SSE-NEXT: [[R1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A1]], i32 [[B1]]) +; SSE-NEXT: [[R2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A2]], i32 [[B2]]) +; SSE-NEXT: [[R3:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A3]], i32 [[B3]]) +; SSE-NEXT: [[R4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A4]], i32 [[B4]]) +; SSE-NEXT: [[R5:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A5]], i32 [[B5]]) +; SSE-NEXT: [[R6:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A6]], i32 [[B6]]) +; SSE-NEXT: [[R7:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A7]], i32 [[B7]]) +; SSE-NEXT: [[R8:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A8]], i32 [[B8]]) +; SSE-NEXT: [[R9:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A9]], i32 [[B9]]) +; SSE-NEXT: [[R10:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A10]], i32 [[B10]]) +; SSE-NEXT: [[R11:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A11]], i32 [[B11]]) +; SSE-NEXT: [[R12:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A12]], i32 [[B12]]) +; SSE-NEXT: [[R13:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A13]], i32 [[B13]]) +; SSE-NEXT: [[R14:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A14]], i32 [[B14]]) +; SSE-NEXT: [[R15:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A15]], i32 [[B15]]) +; SSE-NEXT: store i32 [[R0]], ptr @c32, align 4 +; SSE-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; SSE-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; SSE-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; SSE-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; SSE-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; SSE-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; SSE-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; SSE-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; SSE-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; SSE-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; SSE-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; SSE-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SSE-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; SSE-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; SSE-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @add_v16i32( -; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 -; SLM-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -; SLM-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4 -; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) -; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) -; SLM-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) -; SLM-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; SLM-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; SLM-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; SLM-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; SLM-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; SLM-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; SLM-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; SLM-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; SLM-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; SLM-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; SLM-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; SLM-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; SLM-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; SLM-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; SLM-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; SLM-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; SLM-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; SLM-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; SLM-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; SLM-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; SLM-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; SLM-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; SLM-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; SLM-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; SLM-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; SLM-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; SLM-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; SLM-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; SLM-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; SLM-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; SLM-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; SLM-NEXT: [[R0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A0]], i32 [[B0]]) +; SLM-NEXT: [[R1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A1]], i32 [[B1]]) +; SLM-NEXT: [[R2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A2]], i32 [[B2]]) +; SLM-NEXT: [[R3:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A3]], i32 [[B3]]) +; SLM-NEXT: [[R4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A4]], i32 [[B4]]) +; SLM-NEXT: [[R5:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A5]], i32 [[B5]]) +; SLM-NEXT: [[R6:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A6]], i32 [[B6]]) +; SLM-NEXT: [[R7:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A7]], i32 [[B7]]) +; SLM-NEXT: [[R8:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A8]], i32 [[B8]]) +; SLM-NEXT: [[R9:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A9]], i32 [[B9]]) +; SLM-NEXT: [[R10:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A10]], i32 [[B10]]) +; SLM-NEXT: [[R11:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A11]], i32 [[B11]]) +; SLM-NEXT: [[R12:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A12]], i32 [[B12]]) +; SLM-NEXT: [[R13:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A13]], i32 [[B13]]) +; SLM-NEXT: [[R14:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A14]], i32 [[B14]]) +; SLM-NEXT: [[R15:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A15]], i32 [[B15]]) +; SLM-NEXT: store i32 [[R0]], ptr @c32, align 4 +; SLM-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; SLM-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; SLM-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; SLM-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; SLM-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; SLM-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; SLM-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; SLM-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; SLM-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; SLM-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; SLM-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; SLM-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SLM-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; SLM-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; SLM-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @add_v16i32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4 -; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) -; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4 -; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]]) -; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; AVX-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; AVX-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; AVX-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; AVX-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; AVX-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; AVX-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; AVX-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; AVX-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; AVX-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; AVX-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; AVX-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; AVX-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; AVX-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; AVX-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; AVX-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; AVX-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; AVX-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; AVX-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; AVX-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; AVX-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; AVX-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; AVX-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; AVX-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; AVX-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; AVX-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; AVX-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; AVX-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; AVX-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; AVX-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; AVX-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; AVX-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; AVX-NEXT: [[R0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A0]], i32 [[B0]]) +; AVX-NEXT: [[R1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A1]], i32 [[B1]]) +; AVX-NEXT: [[R2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A2]], i32 [[B2]]) +; AVX-NEXT: [[R3:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A3]], i32 [[B3]]) +; AVX-NEXT: [[R4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A4]], i32 [[B4]]) +; AVX-NEXT: [[R5:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A5]], i32 [[B5]]) +; AVX-NEXT: [[R6:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A6]], i32 [[B6]]) +; AVX-NEXT: [[R7:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A7]], i32 [[B7]]) +; AVX-NEXT: [[R8:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A8]], i32 [[B8]]) +; AVX-NEXT: [[R9:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A9]], i32 [[B9]]) +; AVX-NEXT: [[R10:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A10]], i32 [[B10]]) +; AVX-NEXT: [[R11:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A11]], i32 [[B11]]) +; AVX-NEXT: [[R12:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A12]], i32 [[B12]]) +; AVX-NEXT: [[R13:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A13]], i32 [[B13]]) +; AVX-NEXT: [[R14:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A14]], i32 [[B14]]) +; AVX-NEXT: [[R15:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A15]], i32 [[B15]]) +; AVX-NEXT: store i32 [[R0]], ptr @c32, align 4 +; AVX-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; AVX-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; AVX-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; AVX-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; AVX-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; AVX-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; AVX-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; AVX-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; AVX-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; AVX-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; AVX-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; AVX-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; AVX-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; AVX-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @add_v16i32( -; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4 -; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) -; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4 +; AVX512-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; AVX512-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; AVX512-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; AVX512-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; AVX512-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; AVX512-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; AVX512-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; AVX512-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; AVX512-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; AVX512-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; AVX512-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; AVX512-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; AVX512-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; AVX512-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; AVX512-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; AVX512-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; AVX512-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; AVX512-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; AVX512-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; AVX512-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; AVX512-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; AVX512-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; AVX512-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; AVX512-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; AVX512-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; AVX512-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; AVX512-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; AVX512-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; AVX512-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; AVX512-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; AVX512-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; AVX512-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; AVX512-NEXT: [[R0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A0]], i32 [[B0]]) +; AVX512-NEXT: [[R1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A1]], i32 [[B1]]) +; AVX512-NEXT: [[R2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A2]], i32 [[B2]]) +; AVX512-NEXT: [[R3:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A3]], i32 [[B3]]) +; AVX512-NEXT: [[R4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A4]], i32 [[B4]]) +; AVX512-NEXT: [[R5:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A5]], i32 [[B5]]) +; AVX512-NEXT: [[R6:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A6]], i32 [[B6]]) +; AVX512-NEXT: [[R7:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A7]], i32 [[B7]]) +; AVX512-NEXT: [[R8:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A8]], i32 [[B8]]) +; AVX512-NEXT: [[R9:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A9]], i32 [[B9]]) +; AVX512-NEXT: [[R10:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A10]], i32 [[B10]]) +; AVX512-NEXT: [[R11:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A11]], i32 [[B11]]) +; AVX512-NEXT: [[R12:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A12]], i32 [[B12]]) +; AVX512-NEXT: [[R13:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A13]], i32 [[B13]]) +; AVX512-NEXT: [[R14:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A14]], i32 [[B14]]) +; AVX512-NEXT: [[R15:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A15]], i32 [[B15]]) +; AVX512-NEXT: store i32 [[R0]], ptr @c32, align 4 +; AVX512-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; AVX512-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; AVX512-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; AVX512-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; AVX512-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; AVX512-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; AVX512-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; AVX512-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX512-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; AVX512-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; AVX512-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; AVX512-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; AVX512-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; AVX512-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; AVX512-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; AVX512-NEXT: ret void ; %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 @@ -805,3 +1002,6 @@ define void @add_v64i8() { store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX1: {{.*}} +; AVX2: {{.*}} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll index fab022d69..913a3aa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll @@ -26,41 +26,6 @@ declare i16 @llvm.uadd.sat.i16(i16, i16) declare i8 @llvm.uadd.sat.i8 (i8 , i8 ) define void @add_v8i64() { -; SSE-LABEL: @add_v8i64( -; SSE-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8 -; SSE-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 -; SSE-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 -; SSE-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 -; SSE-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; SSE-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 -; SSE-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 -; SSE-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 -; SSE-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8 -; SSE-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 -; SSE-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 -; SSE-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 -; SSE-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; SSE-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 -; SSE-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 -; SSE-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 -; SSE-NEXT: [[R0:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A0]], i64 [[B0]]) -; SSE-NEXT: [[R1:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A1]], i64 [[B1]]) -; SSE-NEXT: [[R2:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A2]], i64 [[B2]]) -; SSE-NEXT: [[R3:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A3]], i64 [[B3]]) -; SSE-NEXT: [[R4:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A4]], i64 [[B4]]) -; SSE-NEXT: [[R5:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A5]], i64 [[B5]]) -; SSE-NEXT: [[R6:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A6]], i64 [[B6]]) -; SSE-NEXT: [[R7:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A7]], i64 [[B7]]) -; SSE-NEXT: store i64 [[R0]], ptr @c64, align 8 -; SSE-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8 -; SSE-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 -; SSE-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8 -; SSE-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; SSE-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8 -; SSE-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 -; SSE-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8 -; SSE-NEXT: ret void -; ; AVX-LABEL: @add_v8i64( ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8 @@ -115,25 +80,6 @@ define void @add_v8i64() { } define void @add_v16i32() { -; SSE-LABEL: @add_v16i32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 -; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4 -; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) -; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) -; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) -; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 -; SSE-NEXT: ret void -; ; AVX-LABEL: @add_v16i32( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll index afaab8b..4aa02c6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll @@ -62,75 +62,60 @@ define void @sub_v8i64() { ; SSE-NEXT: ret void ; ; SLM-LABEL: @sub_v8i64( -; SLM-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8 -; SLM-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 -; SLM-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 -; SLM-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 -; SLM-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; SLM-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 -; SLM-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 -; SLM-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 -; SLM-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8 -; SLM-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 -; SLM-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 -; SLM-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 -; SLM-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; SLM-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 -; SLM-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 -; SLM-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 -; SLM-NEXT: [[R0:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A0]], i64 [[B0]]) -; SLM-NEXT: [[R1:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A1]], i64 [[B1]]) -; SLM-NEXT: [[R2:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A2]], i64 [[B2]]) -; SLM-NEXT: [[R3:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A3]], i64 [[B3]]) -; SLM-NEXT: [[R4:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A4]], i64 [[B4]]) -; SLM-NEXT: [[R5:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A5]], i64 [[B5]]) -; SLM-NEXT: [[R6:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A6]], i64 [[B6]]) -; SLM-NEXT: [[R7:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A7]], i64 [[B7]]) -; SLM-NEXT: store i64 [[R0]], ptr @c64, align 8 -; SLM-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8 -; SLM-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 -; SLM-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8 -; SLM-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; SLM-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8 -; SLM-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 -; SLM-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8 +; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 +; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 +; SLM-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; SLM-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 +; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; SLM-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) +; SLM-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) +; SLM-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; SLM-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; SLM-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) +; SLM-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 ; SLM-NEXT: ret void ; -; AVX1-LABEL: @sub_v8i64( -; AVX1-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 -; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 -; AVX1-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) -; AVX1-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 -; AVX1-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 -; AVX1-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 -; AVX1-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) -; AVX1-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 -; AVX1-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; AVX1-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; AVX1-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) -; AVX1-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; AVX1-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 -; AVX1-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 -; AVX1-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) -; AVX1-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 -; AVX1-NEXT: ret void -; -; AVX2-LABEL: @sub_v8i64( -; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8 -; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8 -; AVX2-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]]) -; AVX2-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8 -; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; AVX2-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]]) -; AVX2-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; AVX2-NEXT: ret void +; AVX-LABEL: @sub_v8i64( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 +; AVX-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 +; AVX-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; AVX-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 +; AVX-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; AVX-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; AVX-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) +; AVX-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; AVX-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) +; AVX-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; AVX-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; AVX-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) +; AVX-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 +; AVX-NEXT: ret void ; ; AVX512-LABEL: @sub_v8i64( -; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8 -; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8 -; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]]) -; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8 +; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 +; AVX512-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 +; AVX512-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; AVX512-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 +; AVX512-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; AVX512-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; AVX512-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) +; AVX512-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; AVX512-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; AVX512-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; AVX512-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) +; AVX512-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; AVX512-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; AVX512-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; AVX512-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) +; AVX512-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 ; AVX512-NEXT: ret void ; ; AVX256BW-LABEL: @sub_v8i64( @@ -180,59 +165,271 @@ define void @sub_v8i64() { define void @sub_v16i32() { ; SSE-LABEL: @sub_v16i32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 -; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4 -; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) -; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) -; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) -; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; SSE-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; SSE-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; SSE-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; SSE-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; SSE-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; SSE-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; SSE-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; SSE-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; SSE-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; SSE-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; SSE-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; SSE-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; SSE-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; SSE-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; SSE-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; SSE-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; SSE-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; SSE-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; SSE-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; SSE-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; SSE-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; SSE-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; SSE-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; SSE-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; SSE-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; SSE-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; SSE-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; SSE-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; SSE-NEXT: [[R0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A0]], i32 [[B0]]) +; SSE-NEXT: [[R1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A1]], i32 [[B1]]) +; SSE-NEXT: [[R2:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A2]], i32 [[B2]]) +; SSE-NEXT: [[R3:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A3]], i32 [[B3]]) +; SSE-NEXT: [[R4:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A4]], i32 [[B4]]) +; SSE-NEXT: [[R5:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A5]], i32 [[B5]]) +; SSE-NEXT: [[R6:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A6]], i32 [[B6]]) +; SSE-NEXT: [[R7:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A7]], i32 [[B7]]) +; SSE-NEXT: [[R8:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A8]], i32 [[B8]]) +; SSE-NEXT: [[R9:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A9]], i32 [[B9]]) +; SSE-NEXT: [[R10:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A10]], i32 [[B10]]) +; SSE-NEXT: [[R11:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A11]], i32 [[B11]]) +; SSE-NEXT: [[R12:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A12]], i32 [[B12]]) +; SSE-NEXT: [[R13:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A13]], i32 [[B13]]) +; SSE-NEXT: [[R14:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A14]], i32 [[B14]]) +; SSE-NEXT: [[R15:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A15]], i32 [[B15]]) +; SSE-NEXT: store i32 [[R0]], ptr @c32, align 4 +; SSE-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; SSE-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; SSE-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; SSE-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; SSE-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; SSE-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; SSE-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; SSE-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; SSE-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; SSE-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; SSE-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; SSE-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SSE-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; SSE-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; SSE-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @sub_v16i32( -; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 -; SLM-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) -; SLM-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4 -; SLM-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) -; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) -; SLM-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) -; SLM-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; SLM-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; SLM-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; SLM-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; SLM-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; SLM-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; SLM-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; SLM-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; SLM-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; SLM-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; SLM-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; SLM-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; SLM-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; SLM-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; SLM-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; SLM-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; SLM-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; SLM-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; SLM-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; SLM-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; SLM-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; SLM-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; SLM-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; SLM-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; SLM-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; SLM-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; SLM-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; SLM-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; SLM-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; SLM-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; SLM-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; SLM-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; SLM-NEXT: [[R0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A0]], i32 [[B0]]) +; SLM-NEXT: [[R1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A1]], i32 [[B1]]) +; SLM-NEXT: [[R2:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A2]], i32 [[B2]]) +; SLM-NEXT: [[R3:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A3]], i32 [[B3]]) +; SLM-NEXT: [[R4:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A4]], i32 [[B4]]) +; SLM-NEXT: [[R5:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A5]], i32 [[B5]]) +; SLM-NEXT: [[R6:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A6]], i32 [[B6]]) +; SLM-NEXT: [[R7:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A7]], i32 [[B7]]) +; SLM-NEXT: [[R8:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A8]], i32 [[B8]]) +; SLM-NEXT: [[R9:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A9]], i32 [[B9]]) +; SLM-NEXT: [[R10:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A10]], i32 [[B10]]) +; SLM-NEXT: [[R11:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A11]], i32 [[B11]]) +; SLM-NEXT: [[R12:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A12]], i32 [[B12]]) +; SLM-NEXT: [[R13:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A13]], i32 [[B13]]) +; SLM-NEXT: [[R14:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A14]], i32 [[B14]]) +; SLM-NEXT: [[R15:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A15]], i32 [[B15]]) +; SLM-NEXT: store i32 [[R0]], ptr @c32, align 4 +; SLM-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; SLM-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; SLM-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; SLM-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; SLM-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; SLM-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; SLM-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; SLM-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; SLM-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; SLM-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; SLM-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; SLM-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SLM-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; SLM-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; SLM-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @sub_v16i32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4 -; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) -; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4 -; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]]) -; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; AVX-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; AVX-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; AVX-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; AVX-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; AVX-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; AVX-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; AVX-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; AVX-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; AVX-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; AVX-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; AVX-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; AVX-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; AVX-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; AVX-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; AVX-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; AVX-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; AVX-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; AVX-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; AVX-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; AVX-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; AVX-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; AVX-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; AVX-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; AVX-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; AVX-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; AVX-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; AVX-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; AVX-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; AVX-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; AVX-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; AVX-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; AVX-NEXT: [[R0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A0]], i32 [[B0]]) +; AVX-NEXT: [[R1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A1]], i32 [[B1]]) +; AVX-NEXT: [[R2:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A2]], i32 [[B2]]) +; AVX-NEXT: [[R3:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A3]], i32 [[B3]]) +; AVX-NEXT: [[R4:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A4]], i32 [[B4]]) +; AVX-NEXT: [[R5:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A5]], i32 [[B5]]) +; AVX-NEXT: [[R6:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A6]], i32 [[B6]]) +; AVX-NEXT: [[R7:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A7]], i32 [[B7]]) +; AVX-NEXT: [[R8:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A8]], i32 [[B8]]) +; AVX-NEXT: [[R9:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A9]], i32 [[B9]]) +; AVX-NEXT: [[R10:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A10]], i32 [[B10]]) +; AVX-NEXT: [[R11:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A11]], i32 [[B11]]) +; AVX-NEXT: [[R12:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A12]], i32 [[B12]]) +; AVX-NEXT: [[R13:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A13]], i32 [[B13]]) +; AVX-NEXT: [[R14:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A14]], i32 [[B14]]) +; AVX-NEXT: [[R15:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A15]], i32 [[B15]]) +; AVX-NEXT: store i32 [[R0]], ptr @c32, align 4 +; AVX-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; AVX-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; AVX-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; AVX-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; AVX-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; AVX-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; AVX-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; AVX-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; AVX-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; AVX-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; AVX-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; AVX-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; AVX-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; AVX-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @sub_v16i32( -; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4 -; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) -; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4 +; AVX512-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 +; AVX512-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 +; AVX512-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 +; AVX512-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 +; AVX512-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; AVX512-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 +; AVX512-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 +; AVX512-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 +; AVX512-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; AVX512-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 +; AVX512-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 +; AVX512-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 +; AVX512-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; AVX512-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 +; AVX512-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 +; AVX512-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 +; AVX512-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 +; AVX512-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 +; AVX512-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 +; AVX512-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 +; AVX512-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; AVX512-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 +; AVX512-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 +; AVX512-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 +; AVX512-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; AVX512-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 +; AVX512-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 +; AVX512-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 +; AVX512-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; AVX512-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 +; AVX512-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 +; AVX512-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 +; AVX512-NEXT: [[R0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A0]], i32 [[B0]]) +; AVX512-NEXT: [[R1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A1]], i32 [[B1]]) +; AVX512-NEXT: [[R2:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A2]], i32 [[B2]]) +; AVX512-NEXT: [[R3:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A3]], i32 [[B3]]) +; AVX512-NEXT: [[R4:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A4]], i32 [[B4]]) +; AVX512-NEXT: [[R5:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A5]], i32 [[B5]]) +; AVX512-NEXT: [[R6:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A6]], i32 [[B6]]) +; AVX512-NEXT: [[R7:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A7]], i32 [[B7]]) +; AVX512-NEXT: [[R8:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A8]], i32 [[B8]]) +; AVX512-NEXT: [[R9:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A9]], i32 [[B9]]) +; AVX512-NEXT: [[R10:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A10]], i32 [[B10]]) +; AVX512-NEXT: [[R11:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A11]], i32 [[B11]]) +; AVX512-NEXT: [[R12:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A12]], i32 [[B12]]) +; AVX512-NEXT: [[R13:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A13]], i32 [[B13]]) +; AVX512-NEXT: [[R14:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A14]], i32 [[B14]]) +; AVX512-NEXT: [[R15:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A15]], i32 [[B15]]) +; AVX512-NEXT: store i32 [[R0]], ptr @c32, align 4 +; AVX512-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4 +; AVX512-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4 +; AVX512-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4 +; AVX512-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; AVX512-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4 +; AVX512-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4 +; AVX512-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4 +; AVX512-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX512-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4 +; AVX512-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 +; AVX512-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 +; AVX512-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; AVX512-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 +; AVX512-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 +; AVX512-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 ; AVX512-NEXT: ret void ; %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 @@ -805,3 +1002,6 @@ define void @sub_v64i8() { store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX1: {{.*}} +; AVX2: {{.*}} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll index 3510863..2638b68 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll @@ -27,38 +27,22 @@ declare i8 @llvm.usub.sat.i8 (i8 , i8 ) define void @sub_v8i64() { ; SSE-LABEL: @sub_v8i64( -; SSE-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8 -; SSE-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 -; SSE-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 -; SSE-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 -; SSE-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 -; SSE-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 -; SSE-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 -; SSE-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 -; SSE-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8 -; SSE-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 -; SSE-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 -; SSE-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 -; SSE-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 -; SSE-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 -; SSE-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 -; SSE-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 -; SSE-NEXT: [[R0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A0]], i64 [[B0]]) -; SSE-NEXT: [[R1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A1]], i64 [[B1]]) -; SSE-NEXT: [[R2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A2]], i64 [[B2]]) -; SSE-NEXT: [[R3:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A3]], i64 [[B3]]) -; SSE-NEXT: [[R4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A4]], i64 [[B4]]) -; SSE-NEXT: [[R5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A5]], i64 [[B5]]) -; SSE-NEXT: [[R6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A6]], i64 [[B6]]) -; SSE-NEXT: [[R7:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A7]], i64 [[B7]]) -; SSE-NEXT: store i64 [[R0]], ptr @c64, align 8 -; SSE-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8 -; SSE-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 -; SSE-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8 -; SSE-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 -; SSE-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8 -; SSE-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 -; SSE-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8 +; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8 +; SSE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; SSE-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8 +; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; SSE-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; SSE-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]]) +; SSE-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; SSE-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; SSE-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) +; SSE-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; SSE-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; SSE-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) +; SSE-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sub_v8i64( |