diff options
36 files changed, 668 insertions, 221 deletions
diff --git a/.github/workflows/build-ci-container-tooling.yml b/.github/workflows/build-ci-container-tooling.yml index 42db470..c77c7861 100644 --- a/.github/workflows/build-ci-container-tooling.yml +++ b/.github/workflows/build-ci-container-tooling.yml @@ -72,7 +72,7 @@ jobs: - name: Test Container run: | # Use --pull=never to ensure we are testing the just built image. - podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-format-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-format --version | grep version && black --version | grep black' + podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-format-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-format --version | grep version && git-clang-format -h | grep usage && black --version | grep black' podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-lint-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-tidy --version | grep version && clang-tidy-diff.py -h | grep usage' push-ci-container: diff --git a/.github/workflows/containers/github-action-ci-tooling/Dockerfile b/.github/workflows/containers/github-action-ci-tooling/Dockerfile index 7a5d8a3..7d64562 100644 --- a/.github/workflows/containers/github-action-ci-tooling/Dockerfile +++ b/.github/workflows/containers/github-action-ci-tooling/Dockerfile @@ -10,7 +10,8 @@ RUN apt-get update && \ tar -xvJf llvm.tar.xz -C /llvm-extract \ # Only unpack these tools to save space on Github runner. LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-tidy \ - LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format && \ + LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format \ + LLVM-${LLVM_VERSION}-Linux-X64/bin/git-clang-format && \ rm llvm.tar.xz @@ -35,7 +36,9 @@ RUN apt-get update && \ FROM base AS ci-container-code-format ARG LLVM_VERSION -COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format ${LLVM_SYSROOT}/bin/clang-format +COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format \ + /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/git-clang-format \ + ${LLVM_SYSROOT}/bin/ ENV PATH=${LLVM_SYSROOT}/bin:${PATH} diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp index 3fb8560..bfdf9cb 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp @@ -89,7 +89,8 @@ static void fixGenericExprCastToBool(DiagnosticBuilder &Diag, const Expr *SubExpr = Cast->getSubExpr(); - bool NeedInnerParens = utils::fixit::areParensNeededForStatement(*SubExpr); + bool NeedInnerParens = + utils::fixit::areParensNeededForStatement(*SubExpr->IgnoreImpCasts()); bool NeedOuterParens = Parent != nullptr && utils::fixit::areParensNeededForStatement(*Parent); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 216d3f5..33cc401 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -402,6 +402,11 @@ Changes in existing checks declarations and macros in system headers. The documentation is also improved to differentiate the general options from the specific ones. +- Improved :doc:`readability-implicit-bool-conversion + <clang-tidy/checks/readability/implicit-bool-conversion>` check by correctly + adding parentheses when the inner expression are implicitly converted + multiple times. + - Improved :doc:`readability-qualified-auto <clang-tidy/checks/readability/qualified-auto>` check by adding the option `IgnoreAliasing`, that allows not looking at underlying types of type aliases. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp index f3e8bf0..a0e1fd3 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp @@ -547,3 +547,13 @@ namespace PR71848 { // CHECK-FIXES: return static_cast<int>( foo ); } } + +namespace PR161318 { + int AddParenOutsideOfCompoundAssignOp() { + int val = -1; + while(val >>= 7) { + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: implicit conversion 'int' -> 'bool' [readability-implicit-bool-conversion] + // CHECK-FIXES: while((val >>= 7) != 0) { + } + } +} diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index 008de0d..a515cbf 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -79,6 +79,7 @@ public: UbuntuOracular, UbuntuPlucky, UbuntuQuesting, + UbuntuResolute, UnknownDistro }; @@ -130,7 +131,7 @@ public: } bool IsUbuntu() const { - return DistroVal >= UbuntuMaverick && DistroVal <= UbuntuQuesting; + return DistroVal >= UbuntuMaverick && DistroVal <= UbuntuResolute; } bool IsAlpineLinux() const { return DistroVal == AlpineLinux; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 922d679..3811fb0 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1633,8 +1633,8 @@ static bool interp__builtin_elementwise_countzeroes(InterpState &S, const InterpFrame *Frame, const CallExpr *Call, unsigned BuiltinID) { - const bool HasZeroArg = Call->getNumArgs() == 2; - const bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg; + bool HasZeroArg = Call->getNumArgs() == 2; + bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg; assert(Call->getNumArgs() == 1 || HasZeroArg); if (Call->getArg(0)->getType()->isIntegerType()) { PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); @@ -2447,18 +2447,18 @@ interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E, const Pointer &Dst = S.Stk.peek<Pointer>(); const ASTContext &ASTCtx = S.getASTContext(); - const unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType()); - const unsigned LHSVecLen = VT0->getNumElements(); - const unsigned SrcPerLane = 128 / SrcBits; - const unsigned Lanes = LHSVecLen * SrcBits / 128; + unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType()); + unsigned LHSVecLen = VT0->getNumElements(); + unsigned SrcPerLane = 128 / SrcBits; + unsigned Lanes = LHSVecLen * SrcBits / 128; PrimType SrcT = *S.getContext().classify(VT0->getElementType()); PrimType DstT = *S.getContext().classify(getElemType(Dst)); - const bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType(); + bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType(); for (unsigned Lane = 0; Lane != Lanes; ++Lane) { - const unsigned BaseSrc = Lane * SrcPerLane; - const unsigned BaseDst = Lane * (2 * SrcPerLane); + unsigned BaseSrc = Lane * SrcPerLane; + unsigned BaseDst = Lane * (2 * SrcPerLane); for (unsigned I = 0; I != SrcPerLane; ++I) { INT_TYPE_SWITCH_NO_BOOL(SrcT, { @@ -2596,9 +2596,9 @@ static bool interp__builtin_elementwise_triop_fp( FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); llvm::RoundingMode RM = getRoundingMode(FPO); - const QualType Arg1Type = Call->getArg(0)->getType(); - const QualType Arg2Type = Call->getArg(1)->getType(); - const QualType Arg3Type = Call->getArg(2)->getType(); + QualType Arg1Type = Call->getArg(0)->getType(); + QualType Arg2Type = Call->getArg(1)->getType(); + QualType Arg3Type = Call->getArg(2)->getType(); // Non-vector floating point types. if (!Arg1Type->isVectorType()) { @@ -2621,16 +2621,16 @@ static bool interp__builtin_elementwise_triop_fp( assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() && Arg3Type->isVectorType()); - const VectorType *VecT = Arg1Type->castAs<VectorType>(); - const QualType ElemT = VecT->getElementType(); - unsigned NumElems = VecT->getNumElements(); + const VectorType *VecTy = Arg1Type->castAs<VectorType>(); + QualType ElemQT = VecTy->getElementType(); + unsigned NumElems = VecTy->getNumElements(); - assert(ElemT == Arg2Type->castAs<VectorType>()->getElementType() && - ElemT == Arg3Type->castAs<VectorType>()->getElementType()); + assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() && + ElemQT == Arg3Type->castAs<VectorType>()->getElementType()); assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() && NumElems == Arg3Type->castAs<VectorType>()->getNumElements()); - assert(ElemT->isRealFloatingType()); - (void)ElemT; + assert(ElemQT->isRealFloatingType()); + (void)ElemQT; const Pointer &VZ = S.Stk.pop<Pointer>(); const Pointer &VY = S.Stk.pop<Pointer>(); @@ -2775,7 +2775,7 @@ static bool interp__builtin_elementwise_triop( } const auto *VecT = Arg0Type->castAs<VectorType>(); - const PrimType &ElemT = *S.getContext().classify(VecT->getElementType()); + PrimType ElemT = *S.getContext().classify(VecT->getElementType()); unsigned NumElems = VecT->getNumElements(); bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); @@ -2847,9 +2847,9 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, unsigned Lane = static_cast<unsigned>(Index % NumLanes); unsigned InsertPos = Lane * SubElements; - PrimType ElemPT = BaseVec.getFieldDesc()->getPrimType(); + PrimType ElemT = BaseVec.getFieldDesc()->getPrimType(); - TYPE_SWITCH(ElemPT, { + TYPE_SWITCH(ElemT, { for (unsigned I = 0; I != BaseElements; ++I) Dst.elem<T>(I) = BaseVec.elem<T>(I); for (unsigned I = 0; I != SubElements; ++I) @@ -2872,12 +2872,12 @@ static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC, const Pointer &Dst = S.Stk.peek<Pointer>(); unsigned DstLen = A.getNumElems(); - const QualType ElemQT = getElemType(A); - const OptPrimType ElemPT = S.getContext().classify(ElemQT); + QualType ElemQT = getElemType(A); + OptPrimType ElemT = S.getContext().classify(ElemQT); unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType(); - INT_TYPE_SWITCH_NO_BOOL(*ElemPT, { + INT_TYPE_SWITCH_NO_BOOL(*ElemT, { for (unsigned I = 0; I != DstLen; ++I) { APInt ALane = A.elem<T>(I).toAPSInt(); APInt BLane = B.elem<T>(I).toAPSInt(); @@ -2916,13 +2916,13 @@ static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC, unsigned Index = static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1)); - PrimType ElemPT = Vec.getFieldDesc()->getPrimType(); + PrimType ElemT = Vec.getFieldDesc()->getPrimType(); // FIXME(#161685): Replace float+int split with a numeric-only type switch - if (ElemPT == PT_Float) { + if (ElemT == PT_Float) { S.Stk.push<Floating>(Vec.elem<Floating>(Index)); return true; } - INT_TYPE_SWITCH_NO_BOOL(ElemPT, { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { APSInt V = Vec.elem<T>(Index).toAPSInt(); pushInteger(S, V, Call->getType()); }); @@ -2947,8 +2947,8 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC, unsigned Index = static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1)); - PrimType ElemPT = Base.getFieldDesc()->getPrimType(); - INT_TYPE_SWITCH_NO_BOOL(ElemPT, { + PrimType ElemT = Base.getFieldDesc()->getPrimType(); + INT_TYPE_SWITCH_NO_BOOL(ElemT, { for (unsigned I = 0; I != NumElems; ++I) Dst.elem<T>(I) = Base.elem<T>(I); Dst.elem<T>(Index) = static_cast<T>(ValAPS); diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 8a5a9fc..838e087 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -92,6 +92,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) { .Case("oracular", Distro::UbuntuOracular) .Case("plucky", Distro::UbuntuPlucky) .Case("questing", Distro::UbuntuQuesting) + .Case("resolute", Distro::UbuntuResolute) .Default(Distro::UnknownDistro); return Version; } diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 9cbd1bd..7c44efd 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -614,8 +614,7 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( for (unsigned I = 0, MappedIndex = 0; I < Used.size(); I++) { TemplateArgument Arg; if (Used[I]) - Arg = S.Context.getCanonicalTemplateArgument( - CTAI.SugaredConverted[MappedIndex++]); + Arg = CTAI.SugaredConverted[MappedIndex++]; if (I < SubstitutedOuterMost.size()) { SubstitutedOuterMost[I] = Arg; Offset = I + 1; diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp index af2dce8..5f1243a 100644 --- a/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp +++ b/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp @@ -149,7 +149,7 @@ namespace std_example { template<typename T> constexpr bool is_same_v<T, T> = true; template<typename T, typename U> concept same_as = is_same_v<T, U>; - // expected-note@-1 {{because 'is_same_v<int, typename std_example::T2::inner>' evaluated to false}} + // expected-note@-1 {{because 'is_same_v<int, typename T2::inner>' evaluated to false}} static_assert(C1<int>); static_assert(C1<int*>); @@ -160,7 +160,7 @@ namespace std_example { template<typename T> concept C2 = requires(T x) { {*x} -> same_as<typename T::inner>; - // expected-note@-1{{because 'same_as<int, typename std_example::T2::inner>' evaluated to false}} + // expected-note@-1{{because 'same_as<int, typename T2::inner>' evaluated to false}} // expected-note@-2{{because '*x' would be invalid: indirection requires pointer operand ('int' invalid)}} }; diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp index 70a96be..9fc4906 100644 --- a/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp +++ b/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp @@ -27,7 +27,7 @@ using r4i = X<void>::r4<int>; // expected-error{{constraints not satisfied for c // C++ [expr.prim.req.nested] Examples namespace std_example { - template<typename U> concept C1 = sizeof(U) == 1; // expected-note{{because 'sizeof(int) == 1' (4 == 1) evaluated to false}} + template<typename U> concept C1 = sizeof(U) == 1; // expected-note{{because 'sizeof(decltype(+t)) == 1' (4 == 1) evaluated to false}} template<typename T> concept D = requires (T t) { requires C1<decltype (+t)>; // expected-note{{because 'decltype(+t)' (aka 'int') does not satisfy 'C1'}} diff --git a/clang/test/CXX/temp/temp.param/p10-2a.cpp b/clang/test/CXX/temp/temp.param/p10-2a.cpp index c0406f8..4f192d3 100644 --- a/clang/test/CXX/temp/temp.param/p10-2a.cpp +++ b/clang/test/CXX/temp/temp.param/p10-2a.cpp @@ -95,8 +95,8 @@ concept OneOf = (is_same_v<T, Ts> || ...); // #OneOf // expected-note@#OneOf 3{{because 'is_same_v<int, char[1]>' evaluated to false}} // expected-note@#OneOf 3{{and 'is_same_v<int, char[2]>' evaluated to false}} // expected-note@#OneOf {{because 'is_same_v<decltype(nullptr), char>' evaluated to false}} -// expected-note@#OneOf {{because 'is_same_v<std::nullptr_t, char>' evaluated to false}} -// expected-note@#OneOf {{and 'is_same_v<std::nullptr_t, int>' evaluated to false}} +// expected-note@#OneOf {{because 'is_same_v<decltype(nullptr), char>' evaluated to false}} +// expected-note@#OneOf {{and 'is_same_v<decltype(nullptr), int>' evaluated to false}} // expected-note@#OneOf {{and 'is_same_v<decltype(nullptr), int>' evaluated to false}} template<OneOf<char[1], char[2]> T, OneOf<int, long, char> U> diff --git a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl index 999372c..3f0a37d 100644 --- a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl @@ -20,7 +20,7 @@ Buffer<double2> r4; // expected-error@+4 {{constraints not satisfied for class template 'Buffer'}} // expected-note@*:* {{template declaration from hidden source: template <typename element_type> requires __is_typed_resource_element_compatible<element_type> class Buffer}} // expected-note@*:* {{because 'Buffer<int>' does not satisfy '__is_typed_resource_element_compatible'}} -// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(hlsl::Buffer<int>)' evaluated to false}} +// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(Buffer<int>)' evaluated to false}} Buffer<Buffer<int> > r5; struct s { @@ -66,7 +66,7 @@ Buffer<half[4]> r10; typedef vector<int, 8> int8; // expected-error@+3 {{constraints not satisfied for class template 'Buffer'}} // expected-note@*:* {{because 'int8' (aka 'vector<int, 8>') does not satisfy '__is_typed_resource_element_compatible'}} -// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<int, 8>)' evaluated to false}} +// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(int8)' evaluated to false}} Buffer<int8> r11; typedef int MyInt; @@ -91,7 +91,7 @@ Buffer<numbers> r15; // expected-error@+3 {{constraints not satisfied for class template 'Buffer'}} // expected-note@*:* {{because 'double3' (aka 'vector<double, 3>') does not satisfy '__is_typed_resource_element_compatible'}} -// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<double, 3>)' evaluated to false}} +// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(double3)' evaluated to false}} Buffer<double3> r16; diff --git a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl index b33f2af..aa36c48 100644 --- a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl @@ -20,7 +20,7 @@ RWBuffer<double2> r4; // expected-error@+4 {{constraints not satisfied for class template 'RWBuffer'}} // expected-note@*:* {{template declaration from hidden source: template <typename element_type> requires __is_typed_resource_element_compatible<element_type> class RWBuffer}} // expected-note@*:* {{because 'RWBuffer<int>' does not satisfy '__is_typed_resource_element_compatible'}} -// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(hlsl::RWBuffer<int>)' evaluated to false}} +// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(RWBuffer<int>)' evaluated to false}} RWBuffer<RWBuffer<int> > r5; struct s { @@ -66,7 +66,7 @@ RWBuffer<half[4]> r10; typedef vector<int, 8> int8; // expected-error@+3 {{constraints not satisfied for class template 'RWBuffer'}} // expected-note@*:* {{because 'int8' (aka 'vector<int, 8>') does not satisfy '__is_typed_resource_element_compatible'}} -// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<int, 8>)' evaluated to false}} +// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(int8)' evaluated to false}} RWBuffer<int8> r11; typedef int MyInt; @@ -91,7 +91,7 @@ RWBuffer<numbers> r15; // expected-error@+3 {{constraints not satisfied for class template 'RWBuffer'}} // expected-note@*:* {{because 'double3' (aka 'vector<double, 3>') does not satisfy '__is_typed_resource_element_compatible'}} -// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<double, 3>)' evaluated to false}} +// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(double3)' evaluated to false}} RWBuffer<double3> r16; diff --git a/clang/test/SemaTemplate/concepts-recursive-inst.cpp b/clang/test/SemaTemplate/concepts-recursive-inst.cpp index 73dce93..d36c6a8 100644 --- a/clang/test/SemaTemplate/concepts-recursive-inst.cpp +++ b/clang/test/SemaTemplate/concepts-recursive-inst.cpp @@ -68,8 +68,8 @@ struct my_range{ void baz() { auto it = begin(rng); // #BEGIN_CALL // expected-error-re@#INF_REQ {{satisfaction of constraint {{.*}} depends on itself}} -// expected-note@#INF_BEGIN {{while checking the satisfaction of concept 'Inf<DirectRecursiveCheck::my_range>' requested here}} -// expected-note@#INF_BEGIN_EXPR {{while checking constraint satisfaction for template 'begin<DirectRecursiveCheck::my_range>' required here}} +// expected-note@#INF_BEGIN {{while checking the satisfaction of concept 'Inf<struct my_range>' requested here}} +// expected-note@#INF_BEGIN_EXPR {{while checking constraint satisfaction for template 'begin<struct my_range>' required here}} // expected-note@#INF_BEGIN_EXPR {{while substituting deduced template arguments into function template 'begin'}} // expected-note@#INF_BEGIN_EXPR {{in instantiation of requirement here}} // expected-note@#INF_REQ {{while substituting template arguments into constraint expression here}} diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp index 3fbe7c0..ee2bb8d 100644 --- a/clang/test/SemaTemplate/concepts.cpp +++ b/clang/test/SemaTemplate/concepts.cpp @@ -833,13 +833,13 @@ struct Parent { static_assert(Parent<void>::TakesUnary<int, 0>::i == 0); // expected-error@+3{{constraints not satisfied for class template 'TakesUnary'}} // expected-note@#UNARY{{because 'decltype(0ULL)' (aka 'unsigned long long') does not satisfy 'C'}} -// expected-note@#61777_C{{because 'sizeof(unsigned long long) == 4' (8 == 4) evaluated to false}} +// expected-note@#61777_C{{because 'sizeof(decltype(0ULL)) == 4' (8 == 4) evaluated to false}} static_assert(Parent<void>::TakesUnary<int, 0uLL>::i == 0); static_assert(Parent<int>::TakesBinary<int, 0>::i == 0); // expected-error@+3{{constraints not satisfied for class template 'TakesBinary'}} // expected-note@#BINARY{{because 'C2<decltype(0ULL), int>' evaluated to false}} -// expected-note@#61777_C2{{because 'sizeof(unsigned long long) == sizeof(int)' (8 == 4) evaluated to false}} +// expected-note@#61777_C2{{because 'sizeof(decltype(0ULL)) == sizeof(int)' (8 == 4) evaluated to false}} static_assert(Parent<int>::TakesBinary<int, 0ULL>::i == 0); } @@ -1329,8 +1329,8 @@ static_assert(__cpp17_iterator<not_move_constructible>); \ // expected-error {{static assertion failed}} \ // expected-note {{because 'not_move_constructible' does not satisfy '__cpp17_iterator'}} \ // expected-note@#__cpp17_copy_constructible {{because 'not_move_constructible' does not satisfy '__cpp17_copy_constructible'}} \ -// expected-note@#__cpp17_move_constructible {{because 'parameter_mapping_regressions::case3::not_move_constructible' does not satisfy '__cpp17_move_constructible'}} \ -// expected-note@#is_move_constructible_v {{because 'is_move_constructible_v<parameter_mapping_regressions::case3::not_move_constructible>' evaluated to false}} +// expected-note@#__cpp17_move_constructible {{because 'not_move_constructible' does not satisfy '__cpp17_move_constructible'}} \ +// expected-note@#is_move_constructible_v {{because 'is_move_constructible_v<not_move_constructible>' evaluated to false}} } namespace case4 { diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index 7b66177..d9087dd1 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -38,11 +38,6 @@ namespace pointer_union_detail { return std::min<int>({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...}); } - /// Find the first type in a list of types. - template <typename T, typename...> struct GetFirstType { - using type = T; - }; - /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion /// for the template arguments. template <typename ...PTs> class PointerUnionUIntTraits { @@ -264,8 +259,7 @@ struct PointerLikeTypeTraits<PointerUnion<PTs...>> { // Teach DenseMap how to use PointerUnions as keys. template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> { using Union = PointerUnion<PTs...>; - using FirstInfo = - DenseMapInfo<typename pointer_union_detail::GetFirstType<PTs...>::type>; + using FirstInfo = DenseMapInfo<TypeAtIndex<0, PTs...>>; static inline Union getEmptyKey() { return Union(FirstInfo::getEmptyKey()); } diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 5b20d6bd..658f262 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -161,12 +161,10 @@ using TypeAtIndex = std::tuple_element_t<I, std::tuple<Ts...>>; /// Helper which adds two underlying types of enumeration type. /// Implicit conversion to a common type is accepted. template <typename EnumTy1, typename EnumTy2, - typename UT1 = std::enable_if_t<std::is_enum<EnumTy1>::value, - std::underlying_type_t<EnumTy1>>, - typename UT2 = std::enable_if_t<std::is_enum<EnumTy2>::value, - std::underlying_type_t<EnumTy2>>> + typename = std::enable_if_t<std::is_enum_v<EnumTy1> && + std::is_enum_v<EnumTy2>>> constexpr auto addEnumValues(EnumTy1 LHS, EnumTy2 RHS) { - return static_cast<UT1>(LHS) + static_cast<UT2>(RHS); + return llvm::to_underlying(LHS) + llvm::to_underlying(RHS); } //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index ce969ef..ae446df 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -86,6 +86,7 @@ #include <type_traits> // std::is_integral, std::is_enum, std::underlying_type, // std::enable_if +#include "llvm/ADT/STLForwardCompat.h" // llvm::to_underlying #include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow namespace llvm { @@ -139,8 +140,7 @@ struct CheckedInt { template <typename Enum, std::enable_if_t<std::is_enum<Enum>::value, bool> = 0> static CheckedInt from(Enum FromValue) { - using type = std::underlying_type_t<Enum>; - return from<type>(static_cast<type>(FromValue)); + return from(llvm::to_underlying(FromValue)); } // Equality diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h index d47f6c0..39dc7c1 100644 --- a/llvm/include/llvm/IR/ConstantFPRange.h +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -222,6 +222,14 @@ public: LLVM_ABI ConstantFPRange cast(const fltSemantics &DstSem, APFloat::roundingMode RM = APFloat::rmNearestTiesToEven) const; + + /// Return a new range representing the possible values resulting + /// from an addition of a value in this range and a value in \p Other. + LLVM_ABI ConstantFPRange add(const ConstantFPRange &Other) const; + + /// Return a new range representing the possible values resulting + /// from a subtraction of a value in this range and a value in \p Other. + LLVM_ABI ConstantFPRange sub(const ConstantFPRange &Other) const; }; inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) { diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 070e833..51d2e21 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -414,15 +414,31 @@ ConstantFPRange ConstantFPRange::negate() const { return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN); } +/// Return true if the finite part is not empty after removing infinities. +static bool removeInf(APFloat &Lower, APFloat &Upper, bool &HasPosInf, + bool &HasNegInf) { + assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan && + "Non-NaN part is empty."); + auto &Sem = Lower.getSemantics(); + if (Lower.isNegInfinity()) { + Lower = APFloat::getLargest(Sem, /*Negative=*/true); + HasNegInf = true; + } + if (Upper.isPosInfinity()) { + Upper = APFloat::getLargest(Sem, /*Negative=*/false); + HasPosInf = true; + } + return strictCompare(Lower, Upper) != APFloat::cmpGreaterThan; +} + ConstantFPRange ConstantFPRange::getWithoutInf() const { if (isNaNOnly()) return *this; APFloat NewLower = Lower; APFloat NewUpper = Upper; - if (Lower.isNegInfinity()) - NewLower = APFloat::getLargest(getSemantics(), /*Negative=*/true); - if (Upper.isPosInfinity()) - NewUpper = APFloat::getLargest(getSemantics(), /*Negative=*/false); + bool UnusedFlag; + removeInf(NewLower, NewUpper, /*HasPosInf=*/UnusedFlag, + /*HasNegInf=*/UnusedFlag); canonicalizeRange(NewLower, NewUpper); return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN, MayBeSNaN); @@ -444,3 +460,49 @@ ConstantFPRange ConstantFPRange::cast(const fltSemantics &DstSem, /*MayBeQNaNVal=*/MayBeQNaN || MayBeSNaN, /*MayBeSNaNVal=*/false); } + +ConstantFPRange ConstantFPRange::add(const ConstantFPRange &Other) const { + bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) || + ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet()); + if (isNaNOnly() || Other.isNaNOnly()) + return getNaNOnly(getSemantics(), /*MayBeQNaN=*/ResMayBeQNaN, + /*MayBeSNaN=*/false); + bool LHSHasNegInf = false, LHSHasPosInf = false; + APFloat LHSLower = Lower, LHSUpper = Upper; + bool LHSFiniteIsNonEmpty = + removeInf(LHSLower, LHSUpper, LHSHasPosInf, LHSHasNegInf); + bool RHSHasNegInf = false, RHSHasPosInf = false; + APFloat RHSLower = Other.Lower, RHSUpper = Other.Upper; + bool RHSFiniteIsNonEmpty = + removeInf(RHSLower, RHSUpper, RHSHasPosInf, RHSHasNegInf); + // -inf + +inf = QNaN + ResMayBeQNaN |= + (LHSHasNegInf && RHSHasPosInf) || (LHSHasPosInf && RHSHasNegInf); + // +inf + finite/+inf = +inf, -inf + finite/-inf = -inf + bool HasNegInf = (LHSHasNegInf && (RHSFiniteIsNonEmpty || RHSHasNegInf)) || + (RHSHasNegInf && (LHSFiniteIsNonEmpty || LHSHasNegInf)); + bool HasPosInf = (LHSHasPosInf && (RHSFiniteIsNonEmpty || RHSHasPosInf)) || + (RHSHasPosInf && (LHSFiniteIsNonEmpty || LHSHasPosInf)); + if (LHSFiniteIsNonEmpty && RHSFiniteIsNonEmpty) { + APFloat NewLower = + HasNegInf ? APFloat::getInf(LHSLower.getSemantics(), /*Negative=*/true) + : LHSLower + RHSLower; + APFloat NewUpper = + HasPosInf ? APFloat::getInf(LHSUpper.getSemantics(), /*Negative=*/false) + : LHSUpper + RHSUpper; + return ConstantFPRange(NewLower, NewUpper, ResMayBeQNaN, + /*MayBeSNaN=*/false); + } + // If both HasNegInf and HasPosInf are false, the non-NaN part is empty. + // We just return the canonical form [+inf, -inf] for the empty non-NaN set. + return ConstantFPRange( + APFloat::getInf(Lower.getSemantics(), /*Negative=*/HasNegInf), + APFloat::getInf(Upper.getSemantics(), /*Negative=*/!HasPosInf), + ResMayBeQNaN, + /*MayBeSNaN=*/false); +} + +ConstantFPRange ConstantFPRange::sub(const ConstantFPRange &Other) const { + // fsub X, Y = fadd X, (fneg Y) + return add(Other.negate()); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fbce3b0..6965116 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19093,7 +19093,8 @@ static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) { SDValue Ext1 = Op1.getOperand(0); if (Ext0.getOpcode() != ISD::EXTRACT_SUBVECTOR || Ext1.getOpcode() != ISD::EXTRACT_SUBVECTOR || - Ext0.getOperand(0) != Ext1.getOperand(0)) + Ext0.getOperand(0) != Ext1.getOperand(0) || + Ext0.getOperand(0).getValueType().isScalableVector()) return SDValue(); // Check that the type is twice the add types, and the extract are from // upper/lower parts of the same source. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b8761d97..30dfcf2b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5064,17 +5064,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool RenamableSrc) const { if (AArch64::GPR32spRegClass.contains(DestReg) && (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { // If either operand is WSP, expand to ADD #0. if (Subtarget.hasZeroCycleRegMoveGPR64() && !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. - MCRegister DestRegX = TRI->getMatchingSuperReg( - DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); - MCRegister SrcRegX = TRI->getMatchingSuperReg( - SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegX, but a proper @@ -5097,14 +5095,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (Subtarget.hasZeroCycleRegMoveGPR64() && !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. - MCRegister DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); assert(DestRegX.isValid() && "Destination super-reg not valid"); MCRegister SrcRegX = SrcReg == AArch64::WZR ? AArch64::XZR - : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); + : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); assert(SrcRegX.isValid() && "Source super-reg not valid"); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate @@ -5334,11 +5332,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5359,11 +5356,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5374,11 +5370,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5398,11 +5393,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5413,11 +5407,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5441,11 +5434,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5456,11 +5448,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5532,9 +5523,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } #ifndef NDEBUG - const TargetRegisterInfo &TRI = getRegisterInfo(); - errs() << TRI.getRegAsmName(DestReg) << " = COPY " - << TRI.getRegAsmName(SrcReg) << "\n"; + errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg) + << "\n"; #endif llvm_unreachable("unimplemented reg-to-reg copy"); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 456fa4c..7651ba1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -322,7 +322,11 @@ public: VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) { - return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy)); + VPIRFlags Flags; + if (Opcode == Instruction::Trunc) + Flags = VPIRFlags::TruncFlagsTy(false, false); + return tryInsertInstruction( + new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags)); } VPScalarIVStepsRecipe * diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 50136a8..b96d29e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8774,13 +8774,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( assert(!RecurrenceDescriptor::isMinMaxRecurrenceKind(RecurrenceKind) && "Unexpected truncated min-max recurrence!"); Type *RdxTy = RdxDesc.getRecurrenceType(); - auto *Trunc = - new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy); + VPWidenCastRecipe *Trunc; Instruction::CastOps ExtendOpc = RdxDesc.isSigned() ? Instruction::SExt : Instruction::ZExt; - auto *Extnd = new VPWidenCastRecipe(ExtendOpc, Trunc, PhiTy); - Trunc->insertAfter(NewExitingVPV->getDefiningRecipe()); - Extnd->insertAfter(Trunc); + VPWidenCastRecipe *Extnd; + { + VPBuilder::InsertPointGuard Guard(Builder); + Builder.setInsertPoint( + NewExitingVPV->getDefiningRecipe()->getParent(), + std::next(NewExitingVPV->getDefiningRecipe()->getIterator())); + Trunc = + Builder.createWidenCast(Instruction::Trunc, NewExitingVPV, RdxTy); + Extnd = Builder.createWidenCast(ExtendOpc, Trunc, PhiTy); + } if (PhiR->getOperand(1) == NewExitingVPV) PhiR->setOperand(1, Extnd->getVPSingleValue()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7563cd7..9bb8820 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1026,6 +1026,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { PredPHI->replaceAllUsesWith(Op); } + VPBuilder Builder(Def); VPValue *A; if (match(Def, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) { Type *TruncTy = TypeInfo.inferScalarType(Def); @@ -1041,18 +1042,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue())) ? Instruction::SExt : Instruction::ZExt; - auto *VPC = - new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); + auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A, + TruncTy); if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { // UnderlyingExt has distinct return type, used to retain legacy cost. - VPC->setUnderlyingValue(UnderlyingExt); + Ext->setUnderlyingValue(UnderlyingExt); } - VPC->insertBefore(&R); - Def->replaceAllUsesWith(VPC); + Def->replaceAllUsesWith(Ext); } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { - auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy); - VPC->insertBefore(&R); - Def->replaceAllUsesWith(VPC); + auto *Trunc = Builder.createWidenCast(Instruction::Trunc, A, TruncTy); + Def->replaceAllUsesWith(Trunc); } } #ifndef NDEBUG @@ -1098,7 +1097,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith(Def->getOperand(1)); // (x && y) || (x && z) -> x && (y || z) - VPBuilder Builder(Def); if (match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), m_LogicalAnd(m_Deferred(X), m_VPValue(Z)))) && // Simplify only if one of the operands has one use to avoid creating an @@ -2206,20 +2204,20 @@ void VPlanTransforms::truncateToMinimalBitwidths( continue; assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate"); auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op); - VPWidenCastRecipe *NewOp = - IterIsEmpty - ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy, - VPIRFlags::TruncFlagsTy(false, false)) - : ProcessedIter->second; - R.setOperand(Idx, NewOp); - if (!IterIsEmpty) + if (!IterIsEmpty) { + R.setOperand(Idx, ProcessedIter->second); continue; - ProcessedIter->second = NewOp; - if (!Op->isLiveIn()) { - NewOp->insertBefore(&R); - } else { - PH->appendRecipe(NewOp); } + + VPBuilder Builder; + if (Op->isLiveIn()) + Builder.setInsertPoint(PH); + else + Builder.setInsertPoint(&R); + VPWidenCastRecipe *NewOp = + Builder.createWidenCast(Instruction::Trunc, Op, NewResTy); + ProcessedIter->second = NewOp; + R.setOperand(Idx, NewOp); } } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index ee04e41..2d0df56 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -4775,6 +4775,39 @@ entry: ret i32 %z5 } +define i64 @extract_scalable(<2 x i32> %0) "target-features"="+sve2" { +; CHECK-SD-LABEL: extract_scalable: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v1.2s, #1 +; CHECK-SD-NEXT: ptrue p0.s, vl2 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-SD-NEXT: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-SD-NEXT: saddl v0.2d, v0.2s, v0.2s +; CHECK-SD-NEXT: addp d0, v0.2d +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: extract_scalable: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov w9, s0 +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: mov w10, v0.s[1] +; CHECK-GI-NEXT: sdiv w9, w8, w9 +; CHECK-GI-NEXT: sdiv w8, w8, w10 +; CHECK-GI-NEXT: fmov s0, w9 +; CHECK-GI-NEXT: mov v0.s[1], w8 +; CHECK-GI-NEXT: saddl v0.2d, v0.2s, v0.2s +; CHECK-GI-NEXT: addp d0, v0.2d +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: ret + %2 = sdiv <2 x i32> splat (i32 1), %0 + %3 = sext <2 x i32> %2 to <2 x i64> + %4 = add <2 x i64> %3, %3 + %5 = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %4) + ret i64 %5 +} + declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1 declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) declare i16 @llvm.vector.reduce.add.v24i16(<24 x i16>) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 000dc4a..232c354 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -44,8 +44,86 @@ loop: ; preds = %loop, %entry exit: ; preds = %loop ret void } + +; Test case for https://github.com/llvm/llvm-project/issues/162374. +define void @truncate_i16_to_i8_cse(ptr noalias %src, ptr noalias %dst) { +; CHECK-LABEL: define void @truncate_i16_to_i8_cse( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4294967296, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4294967296, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4294967296, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[SRC]], align 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = trunc <vscale x 8 x i16> [[BROADCAST_SPLAT]] to <vscale x 8 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 8 x i8> [[TMP6]], i32 [[TMP9]] +; CHECK-NEXT: store i8 [[TMP10]], ptr null, align 1 +; CHECK-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4294967296, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[COUNT_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[SRC]], align 2 +; CHECK-NEXT: [[VAL_ZEXT:%.*]] = zext i16 [[VAL]] to i64 +; CHECK-NEXT: [[VAL_TRUNC_ZEXT:%.*]] = trunc i64 [[VAL_ZEXT]] to i8 +; CHECK-NEXT: store i8 [[VAL_TRUNC_ZEXT]], ptr null, align 1 +; CHECK-NEXT: [[VAL_TRUNC:%.*]] = trunc i16 [[VAL]] to i8 +; CHECK-NEXT: store i8 [[VAL_TRUNC]], ptr [[DST]], align 1 +; CHECK-NEXT: [[COUNT_NEXT]] = add i32 [[COUNT]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[COUNT_NEXT]], 0 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] + %val = load i16, ptr %src, align 2 + %val.zext = zext i16 %val to i64 + %val.trunc.zext = trunc i64 %val.zext to i8 + store i8 %val.trunc.zext, ptr null, align 1 + %val.trunc = trunc i16 %val to i8 + store i8 %val.trunc, ptr %dst, align 1 + %count.next = add i32 %count, 1 + %exitcond = icmp eq i32 %count.next, 0 + %iv.next = add i64 %iv, 1 + br i1 %exitcond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index cb16032..1533906 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -1,46 +1,59 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck --check-prefix=VF4IC1 %s +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck --check-prefix=VF2IC2 %s define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr noalias %B, ptr noalias %C) { -; CHECK-LABEL: define void @narrow_select_to_single_scalar( -; CHECK-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1 -; CHECK-NEXT: store i16 [[TMP7]], ptr [[B]], align 1 -; CHECK-NEXT: store i16 0, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[SCALAR_PH:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i16 [[IV]] -; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 1 -; CHECK-NEXT: store i16 [[L_0]], ptr [[B]], align 1 -; CHECK-NEXT: [[INVAR_SEL:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1 -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i16 [[INVAR_SEL]] -; CHECK-NEXT: store i16 0, ptr [[GEP_C]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp ne i16 [[IV]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; VF4IC1-LABEL: define void @narrow_select_to_single_scalar( +; VF4IC1-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; VF4IC1-NEXT: [[ENTRY:.*:]] +; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]] +; VF4IC1: [[VECTOR_PH]]: +; VF4IC1-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1 +; VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]] +; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4IC1: [[VECTOR_BODY]]: +; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 +; VF4IC1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 +; VF4IC1-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1 +; VF4IC1-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2 +; VF4IC1-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3 +; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]] +; VF4IC1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1 +; VF4IC1-NEXT: store i16 [[TMP7]], ptr [[B]], align 1 +; VF4IC1-NEXT: store i16 0, ptr [[TMP1]], align 1 +; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; VF4IC1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; VF4IC1-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF4IC1: [[MIDDLE_BLOCK]]: +; VF4IC1-NEXT: br label %[[EXIT:.*]] +; VF4IC1: [[EXIT]]: +; VF4IC1-NEXT: ret void +; +; VF2IC2-LABEL: define void @narrow_select_to_single_scalar( +; VF2IC2-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1 +; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]] +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 +; VF2IC2-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2 +; VF2IC2-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3 +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP3]] +; VF2IC2-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 1 +; VF2IC2-NEXT: store i16 [[TMP5]], ptr [[B]], align 1 +; VF2IC2-NEXT: store i16 0, ptr [[TMP1]], align 1 +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: br label %[[EXIT:.*]] +; VF2IC2: [[EXIT]]: +; VF2IC2-NEXT: ret void ; entry: br label %loop.header @@ -54,15 +67,88 @@ loop.header: %gep.C = getelementptr i16, ptr %C, i16 %invar.sel store i16 0, ptr %gep.C, align 1 %iv.next = add i16 %iv, 1 - %ec = icmp ne i16 %iv, 1024 + %ec = icmp ne i16 %iv.next, 1024 br i1 %ec, label %loop.header, label %exit exit: ret void } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. + +; FIXME: Currently this mis-compiled when interleaving; all stores store the +; last lane of the last part, instead of the last lane per part. +; Test case for https://github.com/llvm/llvm-project/issues/162498. +define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(ptr %dst) { +; VF4IC1-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( +; VF4IC1-SAME: ptr [[DST:%.*]]) { +; VF4IC1-NEXT: [[ENTRY:.*:]] +; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]] +; VF4IC1: [[VECTOR_PH]]: +; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4IC1: [[VECTOR_BODY]]: +; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; VF4IC1-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; VF4IC1-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 +; VF4IC1-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; VF4IC1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[VEC_IND]], splat (i32 1) +; VF4IC1-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 +; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP5]] +; VF4IC1-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 +; VF4IC1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP7]] +; VF4IC1-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 +; VF4IC1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP9]] +; VF4IC1-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP11]] +; VF4IC1-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4 +; VF4IC1-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +; VF4IC1-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4 +; VF4IC1-NEXT: store i32 [[TMP3]], ptr [[TMP12]], align 4 +; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; VF4IC1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; VF4IC1-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4IC1: [[MIDDLE_BLOCK]]: +; VF4IC1-NEXT: br label %[[EXIT:.*]] +; VF4IC1: [[EXIT]]: +; VF4IC1-NEXT: ret void +; +; VF2IC2-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( +; VF2IC2-SAME: ptr [[DST:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2 +; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1 +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]] +; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]] +; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 +; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4 +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: br label %[[EXIT:.*]] +; VF2IC2: [[EXIT]]: +; VF2IC2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.shift = lshr i32 %iv, 1 + %gep.dst = getelementptr i32, ptr %dst, i32 %iv.shift + store i32 %iv, ptr %gep.dst, align 4 + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index 58a65b9..cf9b31c 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -22,6 +22,7 @@ protected: static ConstantFPRange Full; static ConstantFPRange Empty; static ConstantFPRange Finite; + static ConstantFPRange NonNaN; static ConstantFPRange One; static ConstantFPRange PosZero; static ConstantFPRange NegZero; @@ -44,6 +45,8 @@ ConstantFPRange ConstantFPRangeTest::Empty = ConstantFPRange::getEmpty(APFloat::IEEEdouble()); ConstantFPRange ConstantFPRangeTest::Finite = ConstantFPRange::getFinite(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::NonNaN = + ConstantFPRange::getNonNaN(APFloat::IEEEdouble()); ConstantFPRange ConstantFPRangeTest::One = ConstantFPRange(APFloat(1.0)); ConstantFPRange ConstantFPRangeTest::PosZero = ConstantFPRange( APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false)); @@ -79,15 +82,21 @@ static void strictNext(APFloat &V) { V.next(/*nextDown=*/false); } +enum class SparseLevel { + Dense, + SpecialValuesWithAllPowerOfTwos, + SpecialValuesOnly, +}; + template <typename Fn> -static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive, +static void EnumerateConstantFPRangesImpl(Fn TestFn, SparseLevel Level, bool MayBeQNaN, bool MayBeSNaN) { const fltSemantics &Sem = APFloat::Float8E4M3(); APFloat PosInf = APFloat::getInf(Sem, /*Negative=*/false); APFloat NegInf = APFloat::getInf(Sem, /*Negative=*/true); TestFn(ConstantFPRange(PosInf, NegInf, MayBeQNaN, MayBeSNaN)); - if (!Exhaustive) { + if (Level != SparseLevel::Dense) { SmallVector<APFloat, 36> Values; Values.push_back(APFloat::getInf(Sem, /*Negative=*/true)); Values.push_back(APFloat::getLargest(Sem, /*Negative=*/true)); @@ -95,10 +104,13 @@ static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive, unsigned Exponents = APFloat::semanticsMaxExponent(Sem) - APFloat::semanticsMinExponent(Sem) + 3; unsigned MantissaBits = APFloat::semanticsPrecision(Sem) - 1; - // Add -2^(max exponent), -2^(max exponent-1), ..., -2^(min exponent) - for (unsigned M = Exponents - 2; M != 0; --M) - Values.push_back( - APFloat(Sem, APInt(BitWidth, (M + Exponents) << MantissaBits))); + if (Level == SparseLevel::SpecialValuesWithAllPowerOfTwos) { + // Add -2^(max exponent), -2^(max exponent-1), ..., -2^(min exponent) + for (unsigned M = Exponents - 2; M != 0; --M) + Values.push_back( + APFloat(Sem, APInt(BitWidth, (M + Exponents) << MantissaBits))); + } + Values.push_back(APFloat::getSmallestNormalized(Sem, /*Negative=*/true)); Values.push_back(APFloat::getSmallest(Sem, /*Negative=*/true)); Values.push_back(APFloat::getZero(Sem, /*Negative=*/true)); size_t E = Values.size(); @@ -127,26 +139,30 @@ static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive, } template <typename Fn> -static void EnumerateConstantFPRanges(Fn TestFn, bool Exhaustive) { - EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, +static void EnumerateConstantFPRanges(Fn TestFn, SparseLevel Level, + bool IgnoreSNaNs = false) { + EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/false, /*MayBeSNaN=*/false); - EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, - /*MayBeSNaN=*/true); - EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/true, /*MayBeSNaN=*/false); - EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + if (IgnoreSNaNs) + return; + EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/false, + /*MayBeSNaN=*/true); + EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/true, /*MayBeSNaN=*/true); } template <typename Fn> static void EnumerateTwoInterestingConstantFPRanges(Fn TestFn, - bool Exhaustive) { + SparseLevel Level) { EnumerateConstantFPRanges( [&](const ConstantFPRange &CR1) { EnumerateConstantFPRanges( - [&](const ConstantFPRange &CR2) { TestFn(CR1, CR2); }, Exhaustive); + [&](const ConstantFPRange &CR2) { TestFn(CR1, CR2); }, Level, + /*IgnoreSNaNs=*/true); }, - Exhaustive); + Level, /*IgnoreSNaNs=*/true); } template <typename Fn> @@ -348,16 +364,25 @@ TEST_F(ConstantFPRangeTest, ExhaustivelyEnumerate) { constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); unsigned Count = 0; EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, - /*Exhaustive=*/true); + SparseLevel::Dense); EXPECT_EQ(Expected, Count); } TEST_F(ConstantFPRangeTest, Enumerate) { - constexpr unsigned NNaNValues = 2 * ((1 << 4) - 2 + 4); + constexpr unsigned NNaNValues = 2 * ((1 << 4) - 2 + 5); constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); unsigned Count = 0; EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, - /*Exhaustive=*/false); + SparseLevel::SpecialValuesWithAllPowerOfTwos); + EXPECT_EQ(Expected, Count); +} + +TEST_F(ConstantFPRangeTest, EnumerateWithSpecialValuesOnly) { + constexpr unsigned NNaNValues = 2 * 5; + constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); + unsigned Count = 0; + EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, + SparseLevel::SpecialValuesOnly); EXPECT_EQ(Expected, Count); } @@ -459,7 +484,7 @@ TEST_F(ConstantFPRangeTest, FPClassify) { EXPECT_EQ(SignBit, CR.getSignBit()) << CR; EXPECT_EQ(Mask, CR.classify()) << CR; }, - /*Exhaustive=*/true); + SparseLevel::Dense); #endif } @@ -560,7 +585,7 @@ TEST_F(ConstantFPRangeTest, makeAllowedFCmpRegion) { << "Suboptimal result for makeAllowedFCmpRegion(" << Pred << ", " << CR << ")"; }, - /*Exhaustive=*/false); + SparseLevel::SpecialValuesWithAllPowerOfTwos); } #endif } @@ -671,7 +696,7 @@ TEST_F(ConstantFPRangeTest, makeSatisfyingFCmpRegion) { << ", " << CR << ")"; } }, - /*Exhaustive=*/false); + SparseLevel::SpecialValuesWithAllPowerOfTwos); } #endif } @@ -804,13 +829,13 @@ TEST_F(ConstantFPRangeTest, negate) { } TEST_F(ConstantFPRangeTest, getWithout) { - EXPECT_EQ(Full.getWithoutNaN(), ConstantFPRange::getNonNaN(Sem)); + EXPECT_EQ(Full.getWithoutNaN(), NonNaN); EXPECT_EQ(NaN.getWithoutNaN(), Empty); EXPECT_EQ(NaN.getWithoutInf(), NaN); EXPECT_EQ(PosInf.getWithoutInf(), Empty); EXPECT_EQ(NegInf.getWithoutInf(), Empty); - EXPECT_EQ(ConstantFPRange::getNonNaN(Sem).getWithoutInf(), Finite); + EXPECT_EQ(NonNaN.getWithoutInf(), Finite); EXPECT_EQ(Zero.getWithoutInf(), Zero); EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true), APFloat(3.0)) @@ -925,4 +950,119 @@ TEST_F(ConstantFPRangeTest, cast) { /*IgnoreNaNPayload=*/true); } +TEST_F(ConstantFPRangeTest, add) { + EXPECT_EQ(Full.add(Full), NonNaN.unionWith(QNaN)); + EXPECT_EQ(Full.add(Empty), Empty); + EXPECT_EQ(Empty.add(Full), Empty); + EXPECT_EQ(Empty.add(Empty), Empty); + EXPECT_EQ(One.add(One), ConstantFPRange(APFloat(2.0))); + EXPECT_EQ(Some.add(Some), + ConstantFPRange::getNonNaN(APFloat(-6.0), APFloat(6.0))); + EXPECT_EQ(SomePos.add(SomeNeg), + ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(3.0))); + EXPECT_EQ(PosInf.add(PosInf), PosInf); + EXPECT_EQ(NegInf.add(NegInf), NegInf); + EXPECT_EQ(PosInf.add(Finite.unionWith(PosInf)), PosInf); + EXPECT_EQ(NegInf.add(Finite.unionWith(NegInf)), NegInf); + EXPECT_EQ(PosInf.add(Finite.unionWith(NegInf)), PosInf.unionWith(QNaN)); + EXPECT_EQ(NegInf.add(Finite.unionWith(PosInf)), NegInf.unionWith(QNaN)); + EXPECT_EQ(PosInf.add(NegInf), QNaN); + EXPECT_EQ(NegInf.add(PosInf), QNaN); + EXPECT_EQ(PosZero.add(NegZero), PosZero); + EXPECT_EQ(PosZero.add(Zero), PosZero); + EXPECT_EQ(NegZero.add(NegZero), NegZero); + EXPECT_EQ(NegZero.add(Zero), Zero); + EXPECT_EQ(NaN.add(NaN), QNaN); + EXPECT_EQ(NaN.add(Finite), QNaN); + EXPECT_EQ(NonNaN.unionWith(NaN).add(NonNaN), NonNaN.unionWith(QNaN)); + EXPECT_EQ(PosInf.unionWith(QNaN).add(PosInf), PosInf.unionWith(QNaN)); + EXPECT_EQ(PosInf.unionWith(NaN).add(ConstantFPRange(APFloat(24.0))), + PosInf.unionWith(QNaN)); + +#if defined(EXPENSIVE_CHECKS) + EnumerateTwoInterestingConstantFPRanges( + [](const ConstantFPRange &LHS, const ConstantFPRange &RHS) { + ConstantFPRange Res = LHS.add(RHS); + ConstantFPRange Expected = + ConstantFPRange::getEmpty(LHS.getSemantics()); + EnumerateValuesInConstantFPRange( + LHS, + [&](const APFloat &LHSC) { + EnumerateValuesInConstantFPRange( + RHS, + [&](const APFloat &RHSC) { + APFloat Sum = LHSC + RHSC; + EXPECT_TRUE(Res.contains(Sum)) + << "Wrong result for " << LHS << " + " << RHS + << ". The result " << Res << " should contain " << Sum; + if (!Expected.contains(Sum)) + Expected = Expected.unionWith(ConstantFPRange(Sum)); + }, + /*IgnoreNaNPayload=*/true); + }, + /*IgnoreNaNPayload=*/true); + EXPECT_EQ(Res, Expected) + << "Suboptimal result for " << LHS << " + " << RHS << ". Expected " + << Expected << ", but got " << Res; + }, + SparseLevel::SpecialValuesOnly); +#endif +} + +TEST_F(ConstantFPRangeTest, sub) { + EXPECT_EQ(Full.sub(Full), NonNaN.unionWith(QNaN)); + EXPECT_EQ(Full.sub(Empty), Empty); + EXPECT_EQ(Empty.sub(Full), Empty); + EXPECT_EQ(Empty.sub(Empty), Empty); + EXPECT_EQ(One.sub(One), ConstantFPRange(APFloat(0.0))); + EXPECT_EQ(Some.sub(Some), + ConstantFPRange::getNonNaN(APFloat(-6.0), APFloat(6.0))); + EXPECT_EQ(SomePos.sub(SomeNeg), + ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(6.0))); + EXPECT_EQ(PosInf.sub(NegInf), PosInf); + EXPECT_EQ(NegInf.sub(PosInf), NegInf); + EXPECT_EQ(PosInf.sub(Finite.unionWith(NegInf)), PosInf); + EXPECT_EQ(NegInf.sub(Finite.unionWith(PosInf)), NegInf); + EXPECT_EQ(PosInf.sub(Finite.unionWith(PosInf)), PosInf.unionWith(QNaN)); + EXPECT_EQ(NegInf.sub(Finite.unionWith(NegInf)), NegInf.unionWith(QNaN)); + EXPECT_EQ(PosInf.sub(PosInf), QNaN); + EXPECT_EQ(NegInf.sub(NegInf), QNaN); + EXPECT_EQ(PosZero.sub(NegZero), PosZero); + EXPECT_EQ(PosZero.sub(Zero), PosZero); + EXPECT_EQ(NegZero.sub(NegZero), PosZero); + EXPECT_EQ(NegZero.sub(PosZero), NegZero); + EXPECT_EQ(NegZero.sub(Zero), Zero); + EXPECT_EQ(NaN.sub(NaN), QNaN); + EXPECT_EQ(NaN.add(Finite), QNaN); + +#if defined(EXPENSIVE_CHECKS) + EnumerateTwoInterestingConstantFPRanges( + [](const ConstantFPRange &LHS, const ConstantFPRange &RHS) { + ConstantFPRange Res = LHS.sub(RHS); + ConstantFPRange Expected = + ConstantFPRange::getEmpty(LHS.getSemantics()); + EnumerateValuesInConstantFPRange( + LHS, + [&](const APFloat &LHSC) { + EnumerateValuesInConstantFPRange( + RHS, + [&](const APFloat &RHSC) { + APFloat Diff = LHSC - RHSC; + EXPECT_TRUE(Res.contains(Diff)) + << "Wrong result for " << LHS << " - " << RHS + << ". The result " << Res << " should contain " << Diff; + if (!Expected.contains(Diff)) + Expected = Expected.unionWith(ConstantFPRange(Diff)); + }, + /*IgnoreNaNPayload=*/true); + }, + /*IgnoreNaNPayload=*/true); + EXPECT_EQ(Res, Expected) + << "Suboptimal result for " << LHS << " - " << RHS << ". Expected " + << Expected << ", but got " << Res; + }, + SparseLevel::SpecialValuesOnly); +#endif +} + } // anonymous namespace diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp index ddee023..d274458 100644 --- a/llvm/unittests/Support/JobserverTest.cpp +++ b/llvm/unittests/Support/JobserverTest.cpp @@ -355,6 +355,7 @@ TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) { int CurrentActive = ++ActiveTasks; LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive << "\n"); + (void)i; int OldMax = MaxActiveTasks.load(); while (CurrentActive > OldMax) MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 2f2df68..913ba69 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -85,7 +85,8 @@ class LLVMConfig(object): "HWASAN_SYMBOLIZER_PATH", "MSAN_SYMBOLIZER_PATH", "TSAN_SYMBOLIZER_PATH", - "UBSAN_SYMBOLIZER_PATH" "ASAN_OPTIONS", + "UBSAN_SYMBOLIZER_PATH", + "ASAN_OPTIONS", "HWASAN_OPTIONS", "MSAN_OPTIONS", "RTSAN_OPTIONS", diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp index 17371ec..6d54bb6 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -23,6 +23,7 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Operation.h" +#include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -180,6 +181,15 @@ void RawBufferAtomicUMinOp::print(mlir::OpAsmPrinter &p) { // ROCDLDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// +namespace { +struct ROCDLInlinerInterface final : DialectInlinerInterface { + using DialectInlinerInterface::DialectInlinerInterface; + bool isLegalToInline(Operation *, Region *, bool, IRMapping &) const final { + return true; + } +}; +} // namespace + // TODO: This should be the llvm.rocdl dialect once this is supported. void ROCDLDialect::initialize() { addOperations< @@ -194,6 +204,7 @@ void ROCDLDialect::initialize() { // Support unknown operations because not all ROCDL operations are registered. allowUnknownOperations(); + addInterfaces<ROCDLInlinerInterface>(); declarePromisedInterface<gpu::TargetAttrInterface, ROCDLTargetAttr>(); } diff --git a/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir b/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir new file mode 100644 index 0000000..7fd97ef --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt %s --inline | FileCheck %s + +llvm.func @threadidx() -> i32 { + %tid = rocdl.workitem.id.x : i32 + llvm.return %tid : i32 +} + +// CHECK-LABEL: func @caller +llvm.func @caller() -> i32 { + // CHECK-NOT: llvm.call @threadidx + // CHECK: rocdl.workitem.id.x + %z = llvm.call @threadidx() : () -> (i32) + llvm.return %z : i32 +} diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index a1950cb..69725e7 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -757,7 +757,7 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, if (!AttachInfo.NewAllocations.empty()) { DP("Tracked %u total new allocations:\n", (unsigned)AttachInfo.NewAllocations.size()); - for (const auto &Alloc : AttachInfo.NewAllocations) { + for ([[maybe_unused]] const auto &Alloc : AttachInfo.NewAllocations) { DP(" Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n", DPxPTR(Alloc.first), Alloc.second); } diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 3ea846e..4d279bf 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -996,6 +996,8 @@ cc_library( srcs = glob([ "lib/Analysis/FlowSensitive/Models/*.cpp", "lib/Analysis/FlowSensitive/*.cpp", + "lib/Analysis/LifetimeSafety/*.cpp", + "lib/Analysis/LifetimeSafety/*.h", "lib/Analysis/*.cpp", ]) + [ ":analysis_htmllogger_gen", |