diff options
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 97 | ||||
-rw-r--r-- | llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll | 68 |
2 files changed, 165 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 1ef0bad..139a0b8 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1635,6 +1635,10 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::vector_reduce_smax: case Intrinsic::vector_reduce_umin: case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_extract: + case Intrinsic::vector_insert: + case Intrinsic::vector_interleave2: + case Intrinsic::vector_deinterleave2: // Target intrinsics case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: @@ -3758,6 +3762,72 @@ static Constant *ConstantFoldFixedVectorCall( } return nullptr; } + case Intrinsic::vector_extract: { + auto *Idx = dyn_cast<ConstantInt>(Operands[1]); + Constant *Vec = Operands[0]; + if (!Idx || !isa<FixedVectorType>(Vec->getType())) + return nullptr; + + unsigned NumElements = FVTy->getNumElements(); + unsigned VecNumElements = + cast<FixedVectorType>(Vec->getType())->getNumElements(); + unsigned StartingIndex = Idx->getZExtValue(); + + // Extracting entire vector is nop + if (NumElements == VecNumElements && StartingIndex == 0) + return Vec; + + for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E; + ++I) { + Constant *Elt = Vec->getAggregateElement(I); + if (!Elt) + return nullptr; + Result[I - StartingIndex] = Elt; + } + + return ConstantVector::get(Result); + } + case Intrinsic::vector_insert: { + Constant *Vec = Operands[0]; + Constant *SubVec = Operands[1]; + auto *Idx = dyn_cast<ConstantInt>(Operands[2]); + if (!Idx || !isa<FixedVectorType>(Vec->getType())) + return nullptr; + + unsigned SubVecNumElements = + cast<FixedVectorType>(SubVec->getType())->getNumElements(); + unsigned VecNumElements = + cast<FixedVectorType>(Vec->getType())->getNumElements(); + unsigned IdxN = Idx->getZExtValue(); + // Replacing entire vector with a subvec is nop + if (SubVecNumElements == VecNumElements && IdxN == 0) + return SubVec; + + for (unsigned I = 0; I < VecNumElements; ++I) { + Constant *Elt; + if (I < IdxN + SubVecNumElements) + Elt = SubVec->getAggregateElement(I - IdxN); + else + Elt = Vec->getAggregateElement(I); + if (!Elt) + return nullptr; + Result[I] = Elt; + } + return ConstantVector::get(Result); + } + case Intrinsic::vector_interleave2: { + unsigned NumElements = + cast<FixedVectorType>(Operands[0]->getType())->getNumElements(); + for (unsigned I = 0; I < NumElements; ++I) { + Constant *Elt0 = Operands[0]->getAggregateElement(I); + Constant *Elt1 = Operands[1]->getAggregateElement(I); + if (!Elt0 || !Elt1) + return nullptr; + Result[2 * I] = Elt0; + Result[2 * I + 1] = Elt1; + } + return ConstantVector::get(Result); + } default: break; } @@ -3919,6 +3989,33 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, return nullptr; return ConstantStruct::get(StTy, SinResult, CosResult); } + case Intrinsic::vector_deinterleave2: { + auto *Vec = dyn_cast<Constant>(Operands[0]); + if (!Vec) + return nullptr; + + auto *VecTy = cast<VectorType>(Vec->getType()); + unsigned NumElements = VecTy->getElementCount().getKnownMinValue() / 2; + if (isa<ConstantAggregateZero>(Vec)) { + auto *HalfVecTy = VectorType::getHalfElementsVectorType(VecTy); + return ConstantStruct::get(StTy, ConstantAggregateZero::get(HalfVecTy), + ConstantAggregateZero::get(HalfVecTy)); + } + if (isa<FixedVectorType>(Vec->getType())) { + SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements); + for (unsigned I = 0; I < NumElements; ++I) { + Constant *Elt0 = Vec->getAggregateElement(2 * I); + Constant *Elt1 = Vec->getAggregateElement(2 * I + 1); + if (!Elt0 || !Elt1) + return nullptr; + Res0[I] = Elt0; + Res1[I] = Elt1; + } + return ConstantStruct::get(StTy, ConstantVector::get(Res0), + ConstantVector::get(Res1)); + } + return nullptr; + } default: // TODO: Constant folding of vector intrinsics that fall through here does // not work (e.g. overflow intrinsics) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll new file mode 100644 index 0000000..9dbe3d4 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s + +define <3 x i32> @fold_vector_extract() { +; CHECK-LABEL: define <3 x i32> @fold_vector_extract() { +; CHECK-NEXT: ret <3 x i32> <i32 3, i32 4, i32 5> +; + %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 3) + ret <3 x i32> %1 +} + +@a = external global i16, align 1 + +define <3 x i32> @fold_vector_extract_constexpr() { +; CHECK-LABEL: define <3 x i32> @fold_vector_extract_constexpr() { +; CHECK-NEXT: ret <3 x i32> <i32 ptrtoint (ptr @a to i32), i32 1, i32 2> +; + %1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 ptrtoint (ptr @a to i32), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 0) + ret <3 x i32> %1 +} + +define <8 x i32> @fold_vector_extract_nop() { +; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() { +; CHECK-NEXT: ret <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; + %1 = call <8 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @fold_vector_insert() { +; CHECK-LABEL: define <8 x i32> @fold_vector_insert() { +; CHECK-NEXT: ret <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 5, i32 6, i32 7, i32 8> +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @fold_vector_insert_nop() { +; CHECK-LABEL: define <8 x i32> @fold_vector_insert_nop() { +; CHECK-NEXT: ret <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18> +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @fold_vector_interleave2() { +; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() { +; CHECK-NEXT: ret <8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8> +; + %1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>) + ret <8 x i32> %1 +} + +define {<4 x i32>, <4 x i32>} @fold_vector_deinterleave2() { +; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleave2() { +; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> } +; + %1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>) + ret {<4 x i32>, <4 x i32>} %1 +} + +define {<vscale x 4 x i32>, <vscale x 4 x i32>} @fold_scalable_vector_deinterleave2() { +; CHECK-LABEL: define { <vscale x 4 x i32>, <vscale x 4 x i32> } @fold_scalable_vector_deinterleave2() { +; CHECK-NEXT: ret { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer +; + %1 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<vscale x 8 x i32> zeroinitializer) + ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %1 +} |